blob: 444550917bc1f6f89a297cf20868f4b31de4933c [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
8 * Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9 *
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 *
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090014 * Fixes:
Linus Torvalds1da177e2005-04-16 15:20:36 -070015 * Alan Cox : verify_area() now used correctly
16 * Alan Cox : new skbuff lists, look ma no backlogs!
17 * Alan Cox : tidied skbuff lists.
18 * Alan Cox : Now uses generic datagram routines I
19 * added. Also fixed the peek/read crash
20 * from all old Linux datagram code.
21 * Alan Cox : Uses the improved datagram code.
22 * Alan Cox : Added NULL's for socket options.
23 * Alan Cox : Re-commented the code.
24 * Alan Cox : Use new kernel side addressing
25 * Rob Janssen : Correct MTU usage.
26 * Dave Platt : Counter leaks caused by incorrect
27 * interrupt locking and some slightly
28 * dubious gcc output. Can you read
29 * compiler: it said _VOLATILE_
30 * Richard Kooijman : Timestamp fixes.
31 * Alan Cox : New buffers. Use sk->mac.raw.
32 * Alan Cox : sendmsg/recvmsg support.
33 * Alan Cox : Protocol setting support
34 * Alexey Kuznetsov : Untied from IPv4 stack.
35 * Cyrus Durgin : Fixed kerneld for kmod.
36 * Michal Ostrowski : Module initialization cleanup.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090037 * Ulises Alonso : Frame number limit removal and
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 * packet_set_ring memory leak.
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070039 * Eric Biederman : Allow for > 8 byte hardware addresses.
40 * The convention is that longer addresses
41 * will simply extend the hardware address
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090042 * byte arrays at the end of sockaddr_ll
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070043 * and packet_mreq.
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 *
45 * This program is free software; you can redistribute it and/or
46 * modify it under the terms of the GNU General Public License
47 * as published by the Free Software Foundation; either version
48 * 2 of the License, or (at your option) any later version.
49 *
50 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090051
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <linux/types.h>
53#include <linux/sched.h>
54#include <linux/mm.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080055#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070056#include <linux/fcntl.h>
57#include <linux/socket.h>
58#include <linux/in.h>
59#include <linux/inet.h>
60#include <linux/netdevice.h>
61#include <linux/if_packet.h>
62#include <linux/wireless.h>
Herbert Xuffbc6112007-02-04 23:33:10 -080063#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070064#include <linux/kmod.h>
65#include <net/ip.h>
66#include <net/protocol.h>
67#include <linux/skbuff.h>
68#include <net/sock.h>
69#include <linux/errno.h>
70#include <linux/timer.h>
71#include <asm/system.h>
72#include <asm/uaccess.h>
73#include <asm/ioctls.h>
74#include <asm/page.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -040075#include <asm/cacheflush.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070076#include <asm/io.h>
77#include <linux/proc_fs.h>
78#include <linux/seq_file.h>
79#include <linux/poll.h>
80#include <linux/module.h>
81#include <linux/init.h>
82
83#ifdef CONFIG_INET
84#include <net/inet_common.h>
85#endif
86
87#define CONFIG_SOCK_PACKET 1
88
89/*
90 Proposed replacement for SIOC{ADD,DEL}MULTI and
91 IFF_PROMISC, IFF_ALLMULTI flags.
92
93 It is more expensive, but I believe,
94 it is really correct solution: reentereble, safe and fault tolerant.
95
96 IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
97 reference count and global flag, so that real status is
98 (gflag|(count != 0)), so that we can use obsolete faulty interface
99 not harming clever users.
100 */
101#define CONFIG_PACKET_MULTICAST 1
102
103/*
104 Assumptions:
105 - if device has no dev->hard_header routine, it adds and removes ll header
106 inside itself. In this case ll header is invisible outside of device,
107 but higher levels still should reserve dev->hard_header_len.
108 Some devices are enough clever to reallocate skb, when header
109 will not fit to reserved space (tunnel), another ones are silly
110 (PPP).
111 - packet socket receives packets with pulled ll header,
112 so that SOCK_RAW should push it back.
113
114On receive:
115-----------
116
117Incoming, dev->hard_header!=NULL
118 mac.raw -> ll header
119 data -> data
120
121Outgoing, dev->hard_header!=NULL
122 mac.raw -> ll header
123 data -> ll header
124
125Incoming, dev->hard_header==NULL
126 mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900127 PPP makes it, that is wrong, because introduce assymetry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128 between rx and tx paths.
129 data -> data
130
131Outgoing, dev->hard_header==NULL
132 mac.raw -> data. ll header is still not built!
133 data -> data
134
135Resume
136 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
137
138
139On transmit:
140------------
141
142dev->hard_header != NULL
143 mac.raw -> ll header
144 data -> ll header
145
146dev->hard_header == NULL (ll header is added by device, we cannot control it)
147 mac.raw -> data
148 data -> data
149
150 We should set nh.raw on output to correct posistion,
151 packet classifier depends on it.
152 */
153
154/* List of all packet sockets. */
155static HLIST_HEAD(packet_sklist);
156static DEFINE_RWLOCK(packet_sklist_lock);
157
158static atomic_t packet_socks_nr;
159
160
161/* Private packet socket structures. */
162
163#ifdef CONFIG_PACKET_MULTICAST
164struct packet_mclist
165{
166 struct packet_mclist *next;
167 int ifindex;
168 int count;
169 unsigned short type;
170 unsigned short alen;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700171 unsigned char addr[MAX_ADDR_LEN];
172};
173/* identical to struct packet_mreq except it has
174 * a longer address field.
175 */
176struct packet_mreq_max
177{
178 int mr_ifindex;
179 unsigned short mr_type;
180 unsigned short mr_alen;
181 unsigned char mr_address[MAX_ADDR_LEN];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700182};
183#endif
184#ifdef CONFIG_PACKET_MMAP
185static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
186#endif
187
188static void packet_flush_mclist(struct sock *sk);
189
190struct packet_sock {
191 /* struct sock has to be the first member of packet_sock */
192 struct sock sk;
193 struct tpacket_stats stats;
194#ifdef CONFIG_PACKET_MMAP
195 char * *pg_vec;
196 unsigned int head;
197 unsigned int frames_per_block;
198 unsigned int frame_size;
199 unsigned int frame_max;
200 int copy_thresh;
201#endif
202 struct packet_type prot_hook;
203 spinlock_t bind_lock;
Herbert Xu8dc41942007-02-04 23:31:32 -0800204 unsigned int running:1, /* prot_hook is attached*/
205 auxdata:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700206 int ifindex; /* bound device */
Al Viro0e11c912006-11-08 00:26:29 -0800207 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208#ifdef CONFIG_PACKET_MULTICAST
209 struct packet_mclist *mclist;
210#endif
211#ifdef CONFIG_PACKET_MMAP
212 atomic_t mapped;
213 unsigned int pg_vec_order;
214 unsigned int pg_vec_pages;
215 unsigned int pg_vec_len;
216#endif
217};
218
Herbert Xuffbc6112007-02-04 23:33:10 -0800219struct packet_skb_cb {
220 unsigned int origlen;
221 union {
222 struct sockaddr_pkt pkt;
223 struct sockaddr_ll ll;
224 } sa;
225};
226
227#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
Herbert Xu8dc41942007-02-04 23:31:32 -0800228
Linus Torvalds1da177e2005-04-16 15:20:36 -0700229#ifdef CONFIG_PACKET_MMAP
230
231static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
232{
233 unsigned int pg_vec_pos, frame_offset;
234 char *frame;
235
236 pg_vec_pos = position / po->frames_per_block;
237 frame_offset = position % po->frames_per_block;
238
239 frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900240
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241 return frame;
242}
243#endif
244
245static inline struct packet_sock *pkt_sk(struct sock *sk)
246{
247 return (struct packet_sock *)sk;
248}
249
250static void packet_sock_destruct(struct sock *sk)
251{
252 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
253 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
254
255 if (!sock_flag(sk, SOCK_DEAD)) {
256 printk("Attempt to release alive packet socket: %p\n", sk);
257 return;
258 }
259
260 atomic_dec(&packet_socks_nr);
261#ifdef PACKET_REFCNT_DEBUG
262 printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
263#endif
264}
265
266
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800267static const struct proto_ops packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700268
269#ifdef CONFIG_SOCK_PACKET
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800270static const struct proto_ops packet_ops_spkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700271
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700272static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700273{
274 struct sock *sk;
275 struct sockaddr_pkt *spkt;
276
277 /*
278 * When we registered the protocol we saved the socket in the data
279 * field for just this event.
280 */
281
282 sk = pt->af_packet_priv;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900283
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284 /*
285 * Yank back the headers [hope the device set this
286 * right or kerboom...]
287 *
288 * Incoming packets have ll header pulled,
289 * push it back.
290 *
291 * For outgoing ones skb->data == skb->mac.raw
292 * so that this procedure is noop.
293 */
294
295 if (skb->pkt_type == PACKET_LOOPBACK)
296 goto out;
297
298 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
299 goto oom;
300
301 /* drop any routing info */
302 dst_release(skb->dst);
303 skb->dst = NULL;
304
Phil Oester84531c22005-07-12 11:57:52 -0700305 /* drop conntrack reference */
306 nf_reset(skb);
307
Herbert Xuffbc6112007-02-04 23:33:10 -0800308 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700309
310 skb_push(skb, skb->data-skb->mac.raw);
311
312 /*
313 * The SOCK_PACKET socket receives _all_ frames.
314 */
315
316 spkt->spkt_family = dev->type;
317 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
318 spkt->spkt_protocol = skb->protocol;
319
320 /*
321 * Charge the memory to the socket. This is done specifically
322 * to prevent sockets using all the memory up.
323 */
324
325 if (sock_queue_rcv_skb(sk,skb) == 0)
326 return 0;
327
328out:
329 kfree_skb(skb);
330oom:
331 return 0;
332}
333
334
335/*
336 * Output a raw packet to a device layer. This bypasses all the other
337 * protocol layers and you must therefore supply it with a complete frame
338 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900339
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
341 struct msghdr *msg, size_t len)
342{
343 struct sock *sk = sock->sk;
344 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
345 struct sk_buff *skb;
346 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -0800347 __be16 proto=0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700348 int err;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900349
Linus Torvalds1da177e2005-04-16 15:20:36 -0700350 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900351 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700352 */
353
354 if (saddr)
355 {
356 if (msg->msg_namelen < sizeof(struct sockaddr))
357 return(-EINVAL);
358 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
359 proto=saddr->spkt_protocol;
360 }
361 else
362 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
363
364 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900365 * Find the device first to size check it
Linus Torvalds1da177e2005-04-16 15:20:36 -0700366 */
367
368 saddr->spkt_device[13] = 0;
369 dev = dev_get_by_name(saddr->spkt_device);
370 err = -ENODEV;
371 if (dev == NULL)
372 goto out_unlock;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900373
David S. Millerd5e76b02007-01-25 19:30:36 -0800374 err = -ENETDOWN;
375 if (!(dev->flags & IFF_UP))
376 goto out_unlock;
377
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378 /*
379 * You may not queue a frame bigger than the mtu. This is the lowest level
380 * raw protocol and you must do your own fragmentation at this level.
381 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900382
Linus Torvalds1da177e2005-04-16 15:20:36 -0700383 err = -EMSGSIZE;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800384 if (len > dev->mtu + dev->hard_header_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700385 goto out_unlock;
386
387 err = -ENOBUFS;
388 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
389
390 /*
391 * If the write buffer is full, then tough. At this level the user gets to
392 * deal with the problem - do your own algorithmic backoffs. That's far
393 * more flexible.
394 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900395
396 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397 goto out_unlock;
398
399 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900400 * Fill it in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900402
Linus Torvalds1da177e2005-04-16 15:20:36 -0700403 /* FIXME: Save some space for broken drivers that write a
404 * hard header at transmission time by themselves. PPP is the
405 * notable one here. This should really be fixed at the driver level.
406 */
407 skb_reserve(skb, LL_RESERVED_SPACE(dev));
408 skb->nh.raw = skb->data;
409
410 /* Try to align data part correctly */
411 if (dev->hard_header) {
412 skb->data -= dev->hard_header_len;
413 skb->tail -= dev->hard_header_len;
414 if (len < dev->hard_header_len)
415 skb->nh.raw = skb->data;
416 }
417
418 /* Returns -EFAULT on error */
419 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
420 skb->protocol = proto;
421 skb->dev = dev;
422 skb->priority = sk->sk_priority;
423 if (err)
424 goto out_free;
425
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426 /*
427 * Now send it
428 */
429
430 dev_queue_xmit(skb);
431 dev_put(dev);
432 return(len);
433
434out_free:
435 kfree_skb(skb);
436out_unlock:
437 if (dev)
438 dev_put(dev);
439 return err;
440}
441#endif
442
David S. Millerdbcb5852007-01-24 15:21:02 -0800443static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
444 unsigned int res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700445{
446 struct sk_filter *filter;
447
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700448 rcu_read_lock_bh();
449 filter = rcu_dereference(sk->sk_filter);
David S. Millerdbcb5852007-01-24 15:21:02 -0800450 if (filter != NULL)
451 res = sk_run_filter(skb, filter->insns, filter->len);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700452 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700453
David S. Millerdbcb5852007-01-24 15:21:02 -0800454 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455}
456
457/*
458 This function makes lazy skb cloning in hope that most of packets
459 are discarded by BPF.
460
461 Note tricky part: we DO mangle shared skb! skb->data, skb->len
462 and skb->cb are mangled. It works because (and until) packets
463 falling here are owned by current CPU. Output packets are cloned
464 by dev_queue_xmit_nit(), input packets are processed by net_bh
465 sequencially, so that if we return skb to original state on exit,
466 we will not harm anyone.
467 */
468
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700469static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700470{
471 struct sock *sk;
472 struct sockaddr_ll *sll;
473 struct packet_sock *po;
474 u8 * skb_head = skb->data;
475 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800476 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700477
478 if (skb->pkt_type == PACKET_LOOPBACK)
479 goto drop;
480
481 sk = pt->af_packet_priv;
482 po = pkt_sk(sk);
483
484 skb->dev = dev;
485
486 if (dev->hard_header) {
487 /* The device has an explicit notion of ll header,
488 exported to higher levels.
489
490 Otherwise, the device hides datails of it frame
491 structure, so that corresponding packet head
492 never delivered to user.
493 */
494 if (sk->sk_type != SOCK_DGRAM)
495 skb_push(skb, skb->data - skb->mac.raw);
496 else if (skb->pkt_type == PACKET_OUTGOING) {
497 /* Special case: outgoing packets have ll header at head */
498 skb_pull(skb, skb->nh.raw - skb->data);
499 }
500 }
501
502 snaplen = skb->len;
503
David S. Millerdbcb5852007-01-24 15:21:02 -0800504 res = run_filter(skb, sk, snaplen);
505 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700506 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800507 if (snaplen > res)
508 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509
510 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
511 (unsigned)sk->sk_rcvbuf)
512 goto drop_n_acct;
513
514 if (skb_shared(skb)) {
515 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
516 if (nskb == NULL)
517 goto drop_n_acct;
518
519 if (skb_head != skb->data) {
520 skb->data = skb_head;
521 skb->len = skb_len;
522 }
523 kfree_skb(skb);
524 skb = nskb;
525 }
526
Herbert Xuffbc6112007-02-04 23:33:10 -0800527 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
528 sizeof(skb->cb));
529
530 sll = &PACKET_SKB_CB(skb)->sa.ll;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531 sll->sll_family = AF_PACKET;
532 sll->sll_hatype = dev->type;
533 sll->sll_protocol = skb->protocol;
534 sll->sll_pkttype = skb->pkt_type;
535 sll->sll_ifindex = dev->ifindex;
536 sll->sll_halen = 0;
537
538 if (dev->hard_header_parse)
539 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
540
Herbert Xuffbc6112007-02-04 23:33:10 -0800541 PACKET_SKB_CB(skb)->origlen = skb->len;
Herbert Xu8dc41942007-02-04 23:31:32 -0800542
Linus Torvalds1da177e2005-04-16 15:20:36 -0700543 if (pskb_trim(skb, snaplen))
544 goto drop_n_acct;
545
546 skb_set_owner_r(skb, sk);
547 skb->dev = NULL;
548 dst_release(skb->dst);
549 skb->dst = NULL;
550
Phil Oester84531c22005-07-12 11:57:52 -0700551 /* drop conntrack reference */
552 nf_reset(skb);
553
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 spin_lock(&sk->sk_receive_queue.lock);
555 po->stats.tp_packets++;
556 __skb_queue_tail(&sk->sk_receive_queue, skb);
557 spin_unlock(&sk->sk_receive_queue.lock);
558 sk->sk_data_ready(sk, skb->len);
559 return 0;
560
561drop_n_acct:
562 spin_lock(&sk->sk_receive_queue.lock);
563 po->stats.tp_drops++;
564 spin_unlock(&sk->sk_receive_queue.lock);
565
566drop_n_restore:
567 if (skb_head != skb->data && skb_shared(skb)) {
568 skb->data = skb_head;
569 skb->len = skb_len;
570 }
571drop:
572 kfree_skb(skb);
573 return 0;
574}
575
576#ifdef CONFIG_PACKET_MMAP
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700577static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578{
579 struct sock *sk;
580 struct packet_sock *po;
581 struct sockaddr_ll *sll;
582 struct tpacket_hdr *h;
583 u8 * skb_head = skb->data;
584 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800585 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700586 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
587 unsigned short macoff, netoff;
588 struct sk_buff *copy_skb = NULL;
589
590 if (skb->pkt_type == PACKET_LOOPBACK)
591 goto drop;
592
593 sk = pt->af_packet_priv;
594 po = pkt_sk(sk);
595
596 if (dev->hard_header) {
597 if (sk->sk_type != SOCK_DGRAM)
598 skb_push(skb, skb->data - skb->mac.raw);
599 else if (skb->pkt_type == PACKET_OUTGOING) {
600 /* Special case: outgoing packets have ll header at head */
601 skb_pull(skb, skb->nh.raw - skb->data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 }
603 }
604
Herbert Xu8dc41942007-02-04 23:31:32 -0800605 if (skb->ip_summed == CHECKSUM_PARTIAL)
606 status |= TP_STATUS_CSUMNOTREADY;
607
Linus Torvalds1da177e2005-04-16 15:20:36 -0700608 snaplen = skb->len;
609
David S. Millerdbcb5852007-01-24 15:21:02 -0800610 res = run_filter(skb, sk, snaplen);
611 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700612 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800613 if (snaplen > res)
614 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700615
616 if (sk->sk_type == SOCK_DGRAM) {
617 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
618 } else {
619 unsigned maclen = skb->nh.raw - skb->data;
620 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
621 macoff = netoff - maclen;
622 }
623
624 if (macoff + snaplen > po->frame_size) {
625 if (po->copy_thresh &&
626 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
627 (unsigned)sk->sk_rcvbuf) {
628 if (skb_shared(skb)) {
629 copy_skb = skb_clone(skb, GFP_ATOMIC);
630 } else {
631 copy_skb = skb_get(skb);
632 skb_head = skb->data;
633 }
634 if (copy_skb)
635 skb_set_owner_r(copy_skb, sk);
636 }
637 snaplen = po->frame_size - macoff;
638 if ((int)snaplen < 0)
639 snaplen = 0;
640 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700641
642 spin_lock(&sk->sk_receive_queue.lock);
643 h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900644
Linus Torvalds1da177e2005-04-16 15:20:36 -0700645 if (h->tp_status)
646 goto ring_is_full;
647 po->head = po->head != po->frame_max ? po->head+1 : 0;
648 po->stats.tp_packets++;
649 if (copy_skb) {
650 status |= TP_STATUS_COPY;
651 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
652 }
653 if (!po->stats.tp_drops)
654 status &= ~TP_STATUS_LOSING;
655 spin_unlock(&sk->sk_receive_queue.lock);
656
Patrick McHardycbe21d82006-09-17 23:59:57 -0700657 skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700658
659 h->tp_len = skb->len;
660 h->tp_snaplen = snaplen;
661 h->tp_mac = macoff;
662 h->tp_net = netoff;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900663 if (skb->tstamp.off_sec == 0) {
Patrick McHardya61bbcf2005-08-14 17:24:31 -0700664 __net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665 sock_enable_timestamp(sk);
666 }
Herbert Xu325ed822005-10-03 13:57:23 -0700667 h->tp_sec = skb->tstamp.off_sec;
668 h->tp_usec = skb->tstamp.off_usec;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669
670 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
671 sll->sll_halen = 0;
672 if (dev->hard_header_parse)
673 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
674 sll->sll_family = AF_PACKET;
675 sll->sll_hatype = dev->type;
676 sll->sll_protocol = skb->protocol;
677 sll->sll_pkttype = skb->pkt_type;
678 sll->sll_ifindex = dev->ifindex;
679
680 h->tp_status = status;
Ralf Baechlee16aa202006-12-07 00:11:33 -0800681 smp_mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700682
683 {
684 struct page *p_start, *p_end;
685 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
686
687 p_start = virt_to_page(h);
688 p_end = virt_to_page(h_end);
689 while (p_start <= p_end) {
690 flush_dcache_page(p_start);
691 p_start++;
692 }
693 }
694
695 sk->sk_data_ready(sk, 0);
696
697drop_n_restore:
698 if (skb_head != skb->data && skb_shared(skb)) {
699 skb->data = skb_head;
700 skb->len = skb_len;
701 }
702drop:
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900703 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700704 return 0;
705
706ring_is_full:
707 po->stats.tp_drops++;
708 spin_unlock(&sk->sk_receive_queue.lock);
709
710 sk->sk_data_ready(sk, 0);
711 if (copy_skb)
712 kfree_skb(copy_skb);
713 goto drop_n_restore;
714}
715
716#endif
717
718
719static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
720 struct msghdr *msg, size_t len)
721{
722 struct sock *sk = sock->sk;
723 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
724 struct sk_buff *skb;
725 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -0800726 __be16 proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700727 unsigned char *addr;
728 int ifindex, err, reserve = 0;
729
730 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900731 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900733
Linus Torvalds1da177e2005-04-16 15:20:36 -0700734 if (saddr == NULL) {
735 struct packet_sock *po = pkt_sk(sk);
736
737 ifindex = po->ifindex;
738 proto = po->num;
739 addr = NULL;
740 } else {
741 err = -EINVAL;
742 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
743 goto out;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700744 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
745 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 ifindex = saddr->sll_ifindex;
747 proto = saddr->sll_protocol;
748 addr = saddr->sll_addr;
749 }
750
751
752 dev = dev_get_by_index(ifindex);
753 err = -ENXIO;
754 if (dev == NULL)
755 goto out_unlock;
756 if (sock->type == SOCK_RAW)
757 reserve = dev->hard_header_len;
758
David S. Millerd5e76b02007-01-25 19:30:36 -0800759 err = -ENETDOWN;
760 if (!(dev->flags & IFF_UP))
761 goto out_unlock;
762
Linus Torvalds1da177e2005-04-16 15:20:36 -0700763 err = -EMSGSIZE;
764 if (len > dev->mtu+reserve)
765 goto out_unlock;
766
767 skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
768 msg->msg_flags & MSG_DONTWAIT, &err);
769 if (skb==NULL)
770 goto out_unlock;
771
772 skb_reserve(skb, LL_RESERVED_SPACE(dev));
773 skb->nh.raw = skb->data;
774
775 if (dev->hard_header) {
776 int res;
777 err = -EINVAL;
778 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
779 if (sock->type != SOCK_DGRAM) {
780 skb->tail = skb->data;
781 skb->len = 0;
782 } else if (res < 0)
783 goto out_free;
784 }
785
786 /* Returns -EFAULT on error */
787 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
788 if (err)
789 goto out_free;
790
791 skb->protocol = proto;
792 skb->dev = dev;
793 skb->priority = sk->sk_priority;
794
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795 /*
796 * Now send it
797 */
798
799 err = dev_queue_xmit(skb);
800 if (err > 0 && (err = net_xmit_errno(err)) != 0)
801 goto out_unlock;
802
803 dev_put(dev);
804
805 return(len);
806
807out_free:
808 kfree_skb(skb);
809out_unlock:
810 if (dev)
811 dev_put(dev);
812out:
813 return err;
814}
815
816/*
817 * Close a PACKET socket. This is fairly simple. We immediately go
818 * to 'closed' state and remove our protocol entry in the device list.
819 */
820
821static int packet_release(struct socket *sock)
822{
823 struct sock *sk = sock->sk;
824 struct packet_sock *po;
825
826 if (!sk)
827 return 0;
828
829 po = pkt_sk(sk);
830
831 write_lock_bh(&packet_sklist_lock);
832 sk_del_node_init(sk);
833 write_unlock_bh(&packet_sklist_lock);
834
835 /*
836 * Unhook packet receive handler.
837 */
838
839 if (po->running) {
840 /*
841 * Remove the protocol hook
842 */
843 dev_remove_pack(&po->prot_hook);
844 po->running = 0;
845 po->num = 0;
846 __sock_put(sk);
847 }
848
849#ifdef CONFIG_PACKET_MULTICAST
850 packet_flush_mclist(sk);
851#endif
852
853#ifdef CONFIG_PACKET_MMAP
854 if (po->pg_vec) {
855 struct tpacket_req req;
856 memset(&req, 0, sizeof(req));
857 packet_set_ring(sk, &req, 1);
858 }
859#endif
860
861 /*
862 * Now the socket is dead. No more input will appear.
863 */
864
865 sock_orphan(sk);
866 sock->sk = NULL;
867
868 /* Purge queues */
869
870 skb_queue_purge(&sk->sk_receive_queue);
871
872 sock_put(sk);
873 return 0;
874}
875
876/*
877 * Attach a packet hook.
878 */
879
Al Viro0e11c912006-11-08 00:26:29 -0800880static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700881{
882 struct packet_sock *po = pkt_sk(sk);
883 /*
884 * Detach an existing hook if present.
885 */
886
887 lock_sock(sk);
888
889 spin_lock(&po->bind_lock);
890 if (po->running) {
891 __sock_put(sk);
892 po->running = 0;
893 po->num = 0;
894 spin_unlock(&po->bind_lock);
895 dev_remove_pack(&po->prot_hook);
896 spin_lock(&po->bind_lock);
897 }
898
899 po->num = protocol;
900 po->prot_hook.type = protocol;
901 po->prot_hook.dev = dev;
902
903 po->ifindex = dev ? dev->ifindex : 0;
904
905 if (protocol == 0)
906 goto out_unlock;
907
908 if (dev) {
909 if (dev->flags&IFF_UP) {
910 dev_add_pack(&po->prot_hook);
911 sock_hold(sk);
912 po->running = 1;
913 } else {
914 sk->sk_err = ENETDOWN;
915 if (!sock_flag(sk, SOCK_DEAD))
916 sk->sk_error_report(sk);
917 }
918 } else {
919 dev_add_pack(&po->prot_hook);
920 sock_hold(sk);
921 po->running = 1;
922 }
923
924out_unlock:
925 spin_unlock(&po->bind_lock);
926 release_sock(sk);
927 return 0;
928}
929
930/*
931 * Bind a packet socket to a device
932 */
933
934#ifdef CONFIG_SOCK_PACKET
935
936static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
937{
938 struct sock *sk=sock->sk;
939 char name[15];
940 struct net_device *dev;
941 int err = -ENODEV;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900942
Linus Torvalds1da177e2005-04-16 15:20:36 -0700943 /*
944 * Check legality
945 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900946
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800947 if (addr_len != sizeof(struct sockaddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700948 return -EINVAL;
949 strlcpy(name,uaddr->sa_data,sizeof(name));
950
951 dev = dev_get_by_name(name);
952 if (dev) {
953 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
954 dev_put(dev);
955 }
956 return err;
957}
958#endif
959
960static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
961{
962 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
963 struct sock *sk=sock->sk;
964 struct net_device *dev = NULL;
965 int err;
966
967
968 /*
969 * Check legality
970 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900971
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 if (addr_len < sizeof(struct sockaddr_ll))
973 return -EINVAL;
974 if (sll->sll_family != AF_PACKET)
975 return -EINVAL;
976
977 if (sll->sll_ifindex) {
978 err = -ENODEV;
979 dev = dev_get_by_index(sll->sll_ifindex);
980 if (dev == NULL)
981 goto out;
982 }
983 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
984 if (dev)
985 dev_put(dev);
986
987out:
988 return err;
989}
990
991static struct proto packet_proto = {
992 .name = "PACKET",
993 .owner = THIS_MODULE,
994 .obj_size = sizeof(struct packet_sock),
995};
996
997/*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900998 * Create a packet of type SOCK_PACKET.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700999 */
1000
1001static int packet_create(struct socket *sock, int protocol)
1002{
1003 struct sock *sk;
1004 struct packet_sock *po;
Al Viro0e11c912006-11-08 00:26:29 -08001005 __be16 proto = (__force __be16)protocol; /* weird, but documented */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006 int err;
1007
1008 if (!capable(CAP_NET_RAW))
1009 return -EPERM;
1010 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
1011#ifdef CONFIG_SOCK_PACKET
1012 && sock->type != SOCK_PACKET
1013#endif
1014 )
1015 return -ESOCKTNOSUPPORT;
1016
1017 sock->state = SS_UNCONNECTED;
1018
1019 err = -ENOBUFS;
1020 sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
1021 if (sk == NULL)
1022 goto out;
1023
1024 sock->ops = &packet_ops;
1025#ifdef CONFIG_SOCK_PACKET
1026 if (sock->type == SOCK_PACKET)
1027 sock->ops = &packet_ops_spkt;
1028#endif
1029 sock_init_data(sock, sk);
1030
1031 po = pkt_sk(sk);
1032 sk->sk_family = PF_PACKET;
Al Viro0e11c912006-11-08 00:26:29 -08001033 po->num = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001034
1035 sk->sk_destruct = packet_sock_destruct;
1036 atomic_inc(&packet_socks_nr);
1037
1038 /*
1039 * Attach a protocol block
1040 */
1041
1042 spin_lock_init(&po->bind_lock);
1043 po->prot_hook.func = packet_rcv;
1044#ifdef CONFIG_SOCK_PACKET
1045 if (sock->type == SOCK_PACKET)
1046 po->prot_hook.func = packet_rcv_spkt;
1047#endif
1048 po->prot_hook.af_packet_priv = sk;
1049
Al Viro0e11c912006-11-08 00:26:29 -08001050 if (proto) {
1051 po->prot_hook.type = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001052 dev_add_pack(&po->prot_hook);
1053 sock_hold(sk);
1054 po->running = 1;
1055 }
1056
1057 write_lock_bh(&packet_sklist_lock);
1058 sk_add_node(sk, &packet_sklist);
1059 write_unlock_bh(&packet_sklist_lock);
1060 return(0);
1061out:
1062 return err;
1063}
1064
1065/*
1066 * Pull a packet from our receive queue and hand it to the user.
1067 * If necessary we block.
1068 */
1069
1070static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1071 struct msghdr *msg, size_t len, int flags)
1072{
1073 struct sock *sk = sock->sk;
1074 struct sk_buff *skb;
1075 int copied, err;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001076 struct sockaddr_ll *sll;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001077
1078 err = -EINVAL;
1079 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1080 goto out;
1081
1082#if 0
1083 /* What error should we return now? EUNATTACH? */
1084 if (pkt_sk(sk)->ifindex < 0)
1085 return -ENODEV;
1086#endif
1087
1088 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001089 * Call the generic datagram receiver. This handles all sorts
1090 * of horrible races and re-entrancy so we can forget about it
1091 * in the protocol layers.
1092 *
1093 * Now it will return ENETDOWN, if device have just gone down,
1094 * but then it will block.
1095 */
1096
1097 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1098
1099 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001100 * An error occurred so return it. Because skb_recv_datagram()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001101 * handles the blocking we don't see and worry about blocking
1102 * retries.
1103 */
1104
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001105 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 goto out;
1107
1108 /*
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001109 * If the address length field is there to be filled in, we fill
1110 * it in now.
1111 */
1112
Herbert Xuffbc6112007-02-04 23:33:10 -08001113 sll = &PACKET_SKB_CB(skb)->sa.ll;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001114 if (sock->type == SOCK_PACKET)
1115 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1116 else
1117 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1118
1119 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120 * You lose any data beyond the buffer you gave. If it worries a
1121 * user program they can ask the device for its MTU anyway.
1122 */
1123
1124 copied = skb->len;
1125 if (copied > len)
1126 {
1127 copied=len;
1128 msg->msg_flags|=MSG_TRUNC;
1129 }
1130
1131 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1132 if (err)
1133 goto out_free;
1134
1135 sock_recv_timestamp(msg, sk, skb);
1136
1137 if (msg->msg_name)
Herbert Xuffbc6112007-02-04 23:33:10 -08001138 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1139 msg->msg_namelen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001140
Herbert Xu8dc41942007-02-04 23:31:32 -08001141 if (pkt_sk(sk)->auxdata) {
Herbert Xuffbc6112007-02-04 23:33:10 -08001142 struct tpacket_auxdata aux;
1143
1144 aux.tp_status = TP_STATUS_USER;
1145 if (skb->ip_summed == CHECKSUM_PARTIAL)
1146 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1147 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1148 aux.tp_snaplen = skb->len;
1149 aux.tp_mac = 0;
1150 aux.tp_net = skb->nh.raw - skb->data;
1151
1152 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
Herbert Xu8dc41942007-02-04 23:31:32 -08001153 }
1154
Linus Torvalds1da177e2005-04-16 15:20:36 -07001155 /*
1156 * Free or return the buffer as appropriate. Again this
1157 * hides all the races and re-entrancy issues from us.
1158 */
1159 err = (flags&MSG_TRUNC) ? skb->len : copied;
1160
1161out_free:
1162 skb_free_datagram(sk, skb);
1163out:
1164 return err;
1165}
1166
1167#ifdef CONFIG_SOCK_PACKET
1168static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1169 int *uaddr_len, int peer)
1170{
1171 struct net_device *dev;
1172 struct sock *sk = sock->sk;
1173
1174 if (peer)
1175 return -EOPNOTSUPP;
1176
1177 uaddr->sa_family = AF_PACKET;
1178 dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1179 if (dev) {
1180 strlcpy(uaddr->sa_data, dev->name, 15);
1181 dev_put(dev);
1182 } else
1183 memset(uaddr->sa_data, 0, 14);
1184 *uaddr_len = sizeof(*uaddr);
1185
1186 return 0;
1187}
1188#endif
1189
1190static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1191 int *uaddr_len, int peer)
1192{
1193 struct net_device *dev;
1194 struct sock *sk = sock->sk;
1195 struct packet_sock *po = pkt_sk(sk);
1196 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1197
1198 if (peer)
1199 return -EOPNOTSUPP;
1200
1201 sll->sll_family = AF_PACKET;
1202 sll->sll_ifindex = po->ifindex;
1203 sll->sll_protocol = po->num;
1204 dev = dev_get_by_index(po->ifindex);
1205 if (dev) {
1206 sll->sll_hatype = dev->type;
1207 sll->sll_halen = dev->addr_len;
1208 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1209 dev_put(dev);
1210 } else {
1211 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1212 sll->sll_halen = 0;
1213 }
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001214 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001215
1216 return 0;
1217}
1218
1219#ifdef CONFIG_PACKET_MULTICAST
1220static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1221{
1222 switch (i->type) {
1223 case PACKET_MR_MULTICAST:
1224 if (what > 0)
1225 dev_mc_add(dev, i->addr, i->alen, 0);
1226 else
1227 dev_mc_delete(dev, i->addr, i->alen, 0);
1228 break;
1229 case PACKET_MR_PROMISC:
1230 dev_set_promiscuity(dev, what);
1231 break;
1232 case PACKET_MR_ALLMULTI:
1233 dev_set_allmulti(dev, what);
1234 break;
1235 default:;
1236 }
1237}
1238
1239static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1240{
1241 for ( ; i; i=i->next) {
1242 if (i->ifindex == dev->ifindex)
1243 packet_dev_mc(dev, i, what);
1244 }
1245}
1246
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001247static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001248{
1249 struct packet_sock *po = pkt_sk(sk);
1250 struct packet_mclist *ml, *i;
1251 struct net_device *dev;
1252 int err;
1253
1254 rtnl_lock();
1255
1256 err = -ENODEV;
1257 dev = __dev_get_by_index(mreq->mr_ifindex);
1258 if (!dev)
1259 goto done;
1260
1261 err = -EINVAL;
1262 if (mreq->mr_alen > dev->addr_len)
1263 goto done;
1264
1265 err = -ENOBUFS;
Kris Katterjohn8b3a7002006-01-11 15:56:43 -08001266 i = kmalloc(sizeof(*i), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267 if (i == NULL)
1268 goto done;
1269
1270 err = 0;
1271 for (ml = po->mclist; ml; ml = ml->next) {
1272 if (ml->ifindex == mreq->mr_ifindex &&
1273 ml->type == mreq->mr_type &&
1274 ml->alen == mreq->mr_alen &&
1275 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1276 ml->count++;
1277 /* Free the new element ... */
1278 kfree(i);
1279 goto done;
1280 }
1281 }
1282
1283 i->type = mreq->mr_type;
1284 i->ifindex = mreq->mr_ifindex;
1285 i->alen = mreq->mr_alen;
1286 memcpy(i->addr, mreq->mr_address, i->alen);
1287 i->count = 1;
1288 i->next = po->mclist;
1289 po->mclist = i;
1290 packet_dev_mc(dev, i, +1);
1291
1292done:
1293 rtnl_unlock();
1294 return err;
1295}
1296
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001297static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298{
1299 struct packet_mclist *ml, **mlp;
1300
1301 rtnl_lock();
1302
1303 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1304 if (ml->ifindex == mreq->mr_ifindex &&
1305 ml->type == mreq->mr_type &&
1306 ml->alen == mreq->mr_alen &&
1307 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1308 if (--ml->count == 0) {
1309 struct net_device *dev;
1310 *mlp = ml->next;
1311 dev = dev_get_by_index(ml->ifindex);
1312 if (dev) {
1313 packet_dev_mc(dev, ml, -1);
1314 dev_put(dev);
1315 }
1316 kfree(ml);
1317 }
1318 rtnl_unlock();
1319 return 0;
1320 }
1321 }
1322 rtnl_unlock();
1323 return -EADDRNOTAVAIL;
1324}
1325
1326static void packet_flush_mclist(struct sock *sk)
1327{
1328 struct packet_sock *po = pkt_sk(sk);
1329 struct packet_mclist *ml;
1330
1331 if (!po->mclist)
1332 return;
1333
1334 rtnl_lock();
1335 while ((ml = po->mclist) != NULL) {
1336 struct net_device *dev;
1337
1338 po->mclist = ml->next;
1339 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1340 packet_dev_mc(dev, ml, -1);
1341 dev_put(dev);
1342 }
1343 kfree(ml);
1344 }
1345 rtnl_unlock();
1346}
1347#endif
1348
1349static int
1350packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1351{
1352 struct sock *sk = sock->sk;
Herbert Xu8dc41942007-02-04 23:31:32 -08001353 struct packet_sock *po = pkt_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001354 int ret;
1355
1356 if (level != SOL_PACKET)
1357 return -ENOPROTOOPT;
1358
1359 switch(optname) {
1360#ifdef CONFIG_PACKET_MULTICAST
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001361 case PACKET_ADD_MEMBERSHIP:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001362 case PACKET_DROP_MEMBERSHIP:
1363 {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001364 struct packet_mreq_max mreq;
1365 int len = optlen;
1366 memset(&mreq, 0, sizeof(mreq));
1367 if (len < sizeof(struct packet_mreq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001368 return -EINVAL;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001369 if (len > sizeof(mreq))
1370 len = sizeof(mreq);
1371 if (copy_from_user(&mreq,optval,len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001372 return -EFAULT;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001373 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1374 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001375 if (optname == PACKET_ADD_MEMBERSHIP)
1376 ret = packet_mc_add(sk, &mreq);
1377 else
1378 ret = packet_mc_drop(sk, &mreq);
1379 return ret;
1380 }
1381#endif
1382#ifdef CONFIG_PACKET_MMAP
1383 case PACKET_RX_RING:
1384 {
1385 struct tpacket_req req;
1386
1387 if (optlen<sizeof(req))
1388 return -EINVAL;
1389 if (copy_from_user(&req,optval,sizeof(req)))
1390 return -EFAULT;
1391 return packet_set_ring(sk, &req, 0);
1392 }
1393 case PACKET_COPY_THRESH:
1394 {
1395 int val;
1396
1397 if (optlen!=sizeof(val))
1398 return -EINVAL;
1399 if (copy_from_user(&val,optval,sizeof(val)))
1400 return -EFAULT;
1401
1402 pkt_sk(sk)->copy_thresh = val;
1403 return 0;
1404 }
1405#endif
Herbert Xu8dc41942007-02-04 23:31:32 -08001406 case PACKET_AUXDATA:
1407 {
1408 int val;
1409
1410 if (optlen < sizeof(val))
1411 return -EINVAL;
1412 if (copy_from_user(&val, optval, sizeof(val)))
1413 return -EFAULT;
1414
1415 po->auxdata = !!val;
1416 return 0;
1417 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001418 default:
1419 return -ENOPROTOOPT;
1420 }
1421}
1422
1423static int packet_getsockopt(struct socket *sock, int level, int optname,
1424 char __user *optval, int __user *optlen)
1425{
1426 int len;
Herbert Xu8dc41942007-02-04 23:31:32 -08001427 int val;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001428 struct sock *sk = sock->sk;
1429 struct packet_sock *po = pkt_sk(sk);
Herbert Xu8dc41942007-02-04 23:31:32 -08001430 void *data;
1431 struct tpacket_stats st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432
1433 if (level != SOL_PACKET)
1434 return -ENOPROTOOPT;
1435
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001436 if (get_user(len, optlen))
1437 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001438
1439 if (len < 0)
1440 return -EINVAL;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001441
Linus Torvalds1da177e2005-04-16 15:20:36 -07001442 switch(optname) {
1443 case PACKET_STATISTICS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001444 if (len > sizeof(struct tpacket_stats))
1445 len = sizeof(struct tpacket_stats);
1446 spin_lock_bh(&sk->sk_receive_queue.lock);
1447 st = po->stats;
1448 memset(&po->stats, 0, sizeof(st));
1449 spin_unlock_bh(&sk->sk_receive_queue.lock);
1450 st.tp_packets += st.tp_drops;
1451
Herbert Xu8dc41942007-02-04 23:31:32 -08001452 data = &st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001453 break;
Herbert Xu8dc41942007-02-04 23:31:32 -08001454 case PACKET_AUXDATA:
1455 if (len > sizeof(int))
1456 len = sizeof(int);
1457 val = po->auxdata;
1458
1459 data = &val;
1460 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001461 default:
1462 return -ENOPROTOOPT;
1463 }
1464
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001465 if (put_user(len, optlen))
1466 return -EFAULT;
Herbert Xu8dc41942007-02-04 23:31:32 -08001467 if (copy_to_user(optval, data, len))
1468 return -EFAULT;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001469 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001470}
1471
1472
1473static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1474{
1475 struct sock *sk;
1476 struct hlist_node *node;
1477 struct net_device *dev = (struct net_device*)data;
1478
1479 read_lock(&packet_sklist_lock);
1480 sk_for_each(sk, node, &packet_sklist) {
1481 struct packet_sock *po = pkt_sk(sk);
1482
1483 switch (msg) {
1484 case NETDEV_UNREGISTER:
1485#ifdef CONFIG_PACKET_MULTICAST
1486 if (po->mclist)
1487 packet_dev_mclist(dev, po->mclist, -1);
1488 // fallthrough
1489#endif
1490 case NETDEV_DOWN:
1491 if (dev->ifindex == po->ifindex) {
1492 spin_lock(&po->bind_lock);
1493 if (po->running) {
1494 __dev_remove_pack(&po->prot_hook);
1495 __sock_put(sk);
1496 po->running = 0;
1497 sk->sk_err = ENETDOWN;
1498 if (!sock_flag(sk, SOCK_DEAD))
1499 sk->sk_error_report(sk);
1500 }
1501 if (msg == NETDEV_UNREGISTER) {
1502 po->ifindex = -1;
1503 po->prot_hook.dev = NULL;
1504 }
1505 spin_unlock(&po->bind_lock);
1506 }
1507 break;
1508 case NETDEV_UP:
1509 spin_lock(&po->bind_lock);
1510 if (dev->ifindex == po->ifindex && po->num &&
1511 !po->running) {
1512 dev_add_pack(&po->prot_hook);
1513 sock_hold(sk);
1514 po->running = 1;
1515 }
1516 spin_unlock(&po->bind_lock);
1517 break;
1518 }
1519 }
1520 read_unlock(&packet_sklist_lock);
1521 return NOTIFY_DONE;
1522}
1523
1524
1525static int packet_ioctl(struct socket *sock, unsigned int cmd,
1526 unsigned long arg)
1527{
1528 struct sock *sk = sock->sk;
1529
1530 switch(cmd) {
1531 case SIOCOUTQ:
1532 {
1533 int amount = atomic_read(&sk->sk_wmem_alloc);
1534 return put_user(amount, (int __user *)arg);
1535 }
1536 case SIOCINQ:
1537 {
1538 struct sk_buff *skb;
1539 int amount = 0;
1540
1541 spin_lock_bh(&sk->sk_receive_queue.lock);
1542 skb = skb_peek(&sk->sk_receive_queue);
1543 if (skb)
1544 amount = skb->len;
1545 spin_unlock_bh(&sk->sk_receive_queue.lock);
1546 return put_user(amount, (int __user *)arg);
1547 }
1548 case SIOCGSTAMP:
1549 return sock_get_timestamp(sk, (struct timeval __user *)arg);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001550
Linus Torvalds1da177e2005-04-16 15:20:36 -07001551#ifdef CONFIG_INET
1552 case SIOCADDRT:
1553 case SIOCDELRT:
1554 case SIOCDARP:
1555 case SIOCGARP:
1556 case SIOCSARP:
1557 case SIOCGIFADDR:
1558 case SIOCSIFADDR:
1559 case SIOCGIFBRDADDR:
1560 case SIOCSIFBRDADDR:
1561 case SIOCGIFNETMASK:
1562 case SIOCSIFNETMASK:
1563 case SIOCGIFDSTADDR:
1564 case SIOCSIFDSTADDR:
1565 case SIOCSIFFLAGS:
1566 return inet_dgram_ops.ioctl(sock, cmd, arg);
1567#endif
1568
1569 default:
Christoph Hellwigb5e5fa52006-01-03 14:18:33 -08001570 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001571 }
1572 return 0;
1573}
1574
1575#ifndef CONFIG_PACKET_MMAP
1576#define packet_mmap sock_no_mmap
1577#define packet_poll datagram_poll
1578#else
1579
1580static unsigned int packet_poll(struct file * file, struct socket *sock,
1581 poll_table *wait)
1582{
1583 struct sock *sk = sock->sk;
1584 struct packet_sock *po = pkt_sk(sk);
1585 unsigned int mask = datagram_poll(file, sock, wait);
1586
1587 spin_lock_bh(&sk->sk_receive_queue.lock);
1588 if (po->pg_vec) {
1589 unsigned last = po->head ? po->head-1 : po->frame_max;
1590 struct tpacket_hdr *h;
1591
1592 h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1593
1594 if (h->tp_status)
1595 mask |= POLLIN | POLLRDNORM;
1596 }
1597 spin_unlock_bh(&sk->sk_receive_queue.lock);
1598 return mask;
1599}
1600
1601
1602/* Dirty? Well, I still did not learn better way to account
1603 * for user mmaps.
1604 */
1605
1606static void packet_mm_open(struct vm_area_struct *vma)
1607{
1608 struct file *file = vma->vm_file;
Eric Dumazetb69aee02005-09-06 14:42:45 -07001609 struct socket * sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001611
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 if (sk)
1613 atomic_inc(&pkt_sk(sk)->mapped);
1614}
1615
1616static void packet_mm_close(struct vm_area_struct *vma)
1617{
1618 struct file *file = vma->vm_file;
Eric Dumazetb69aee02005-09-06 14:42:45 -07001619 struct socket * sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001620 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001621
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622 if (sk)
1623 atomic_dec(&pkt_sk(sk)->mapped);
1624}
1625
1626static struct vm_operations_struct packet_mmap_ops = {
1627 .open = packet_mm_open,
1628 .close =packet_mm_close,
1629};
1630
1631static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1632{
1633 return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1634}
1635
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001636static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001637{
1638 int i;
1639
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001640 for (i = 0; i < len; i++) {
1641 if (likely(pg_vec[i]))
1642 free_pages((unsigned long) pg_vec[i], order);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001643 }
1644 kfree(pg_vec);
1645}
1646
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001647static inline char *alloc_one_pg_vec_page(unsigned long order)
1648{
1649 return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1650 order);
1651}
1652
1653static char **alloc_pg_vec(struct tpacket_req *req, int order)
1654{
1655 unsigned int block_nr = req->tp_block_nr;
1656 char **pg_vec;
1657 int i;
1658
1659 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1660 if (unlikely(!pg_vec))
1661 goto out;
1662
1663 for (i = 0; i < block_nr; i++) {
1664 pg_vec[i] = alloc_one_pg_vec_page(order);
1665 if (unlikely(!pg_vec[i]))
1666 goto out_free_pgvec;
1667 }
1668
1669out:
1670 return pg_vec;
1671
1672out_free_pgvec:
1673 free_pg_vec(pg_vec, order, block_nr);
1674 pg_vec = NULL;
1675 goto out;
1676}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677
1678static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1679{
1680 char **pg_vec = NULL;
1681 struct packet_sock *po = pkt_sk(sk);
Al Viro0e11c912006-11-08 00:26:29 -08001682 int was_running, order = 0;
1683 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 int err = 0;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001685
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 if (req->tp_block_nr) {
1687 int i, l;
1688
1689 /* Sanity tests and some calculations */
1690
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001691 if (unlikely(po->pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001692 return -EBUSY;
1693
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001694 if (unlikely((int)req->tp_block_size <= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001695 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001696 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001697 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001698 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001700 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001701 return -EINVAL;
1702
1703 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001704 if (unlikely(po->frames_per_block <= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001705 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001706 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1707 req->tp_frame_nr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709
1710 err = -ENOMEM;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001711 order = get_order(req->tp_block_size);
1712 pg_vec = alloc_pg_vec(req, order);
1713 if (unlikely(!pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001715
1716 l = 0;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001717 for (i = 0; i < req->tp_block_nr; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001718 char *ptr = pg_vec[i];
1719 struct tpacket_hdr *header;
1720 int k;
1721
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001722 for (k = 0; k < po->frames_per_block; k++) {
1723 header = (struct tpacket_hdr *) ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001724 header->tp_status = TP_STATUS_KERNEL;
1725 ptr += req->tp_frame_size;
1726 }
1727 }
1728 /* Done */
1729 } else {
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001730 if (unlikely(req->tp_frame_nr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001731 return -EINVAL;
1732 }
1733
1734 lock_sock(sk);
1735
1736 /* Detach socket from network */
1737 spin_lock(&po->bind_lock);
1738 was_running = po->running;
1739 num = po->num;
1740 if (was_running) {
1741 __dev_remove_pack(&po->prot_hook);
1742 po->num = 0;
1743 po->running = 0;
1744 __sock_put(sk);
1745 }
1746 spin_unlock(&po->bind_lock);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001747
Linus Torvalds1da177e2005-04-16 15:20:36 -07001748 synchronize_net();
1749
1750 err = -EBUSY;
1751 if (closing || atomic_read(&po->mapped) == 0) {
1752 err = 0;
1753#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1754
1755 spin_lock_bh(&sk->sk_receive_queue.lock);
1756 pg_vec = XC(po->pg_vec, pg_vec);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001757 po->frame_max = (req->tp_frame_nr - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001758 po->head = 0;
1759 po->frame_size = req->tp_frame_size;
1760 spin_unlock_bh(&sk->sk_receive_queue.lock);
1761
1762 order = XC(po->pg_vec_order, order);
1763 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1764
1765 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1766 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1767 skb_queue_purge(&sk->sk_receive_queue);
1768#undef XC
1769 if (atomic_read(&po->mapped))
1770 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1771 }
1772
1773 spin_lock(&po->bind_lock);
1774 if (was_running && !po->running) {
1775 sock_hold(sk);
1776 po->running = 1;
1777 po->num = num;
1778 dev_add_pack(&po->prot_hook);
1779 }
1780 spin_unlock(&po->bind_lock);
1781
1782 release_sock(sk);
1783
Linus Torvalds1da177e2005-04-16 15:20:36 -07001784 if (pg_vec)
1785 free_pg_vec(pg_vec, order, req->tp_block_nr);
1786out:
1787 return err;
1788}
1789
1790static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1791{
1792 struct sock *sk = sock->sk;
1793 struct packet_sock *po = pkt_sk(sk);
1794 unsigned long size;
1795 unsigned long start;
1796 int err = -EINVAL;
1797 int i;
1798
1799 if (vma->vm_pgoff)
1800 return -EINVAL;
1801
1802 size = vma->vm_end - vma->vm_start;
1803
1804 lock_sock(sk);
1805 if (po->pg_vec == NULL)
1806 goto out;
1807 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1808 goto out;
1809
Linus Torvalds1da177e2005-04-16 15:20:36 -07001810 start = vma->vm_start;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001811 for (i = 0; i < po->pg_vec_len; i++) {
1812 struct page *page = virt_to_page(po->pg_vec[i]);
1813 int pg_num;
1814
1815 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1816 err = vm_insert_page(vma, start, page);
1817 if (unlikely(err))
1818 goto out;
1819 start += PAGE_SIZE;
1820 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001821 }
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001822 atomic_inc(&po->mapped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001823 vma->vm_ops = &packet_mmap_ops;
1824 err = 0;
1825
1826out:
1827 release_sock(sk);
1828 return err;
1829}
1830#endif
1831
1832
1833#ifdef CONFIG_SOCK_PACKET
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08001834static const struct proto_ops packet_ops_spkt = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001835 .family = PF_PACKET,
1836 .owner = THIS_MODULE,
1837 .release = packet_release,
1838 .bind = packet_bind_spkt,
1839 .connect = sock_no_connect,
1840 .socketpair = sock_no_socketpair,
1841 .accept = sock_no_accept,
1842 .getname = packet_getname_spkt,
1843 .poll = datagram_poll,
1844 .ioctl = packet_ioctl,
1845 .listen = sock_no_listen,
1846 .shutdown = sock_no_shutdown,
1847 .setsockopt = sock_no_setsockopt,
1848 .getsockopt = sock_no_getsockopt,
1849 .sendmsg = packet_sendmsg_spkt,
1850 .recvmsg = packet_recvmsg,
1851 .mmap = sock_no_mmap,
1852 .sendpage = sock_no_sendpage,
1853};
1854#endif
1855
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08001856static const struct proto_ops packet_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001857 .family = PF_PACKET,
1858 .owner = THIS_MODULE,
1859 .release = packet_release,
1860 .bind = packet_bind,
1861 .connect = sock_no_connect,
1862 .socketpair = sock_no_socketpair,
1863 .accept = sock_no_accept,
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001864 .getname = packet_getname,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001865 .poll = packet_poll,
1866 .ioctl = packet_ioctl,
1867 .listen = sock_no_listen,
1868 .shutdown = sock_no_shutdown,
1869 .setsockopt = packet_setsockopt,
1870 .getsockopt = packet_getsockopt,
1871 .sendmsg = packet_sendmsg,
1872 .recvmsg = packet_recvmsg,
1873 .mmap = packet_mmap,
1874 .sendpage = sock_no_sendpage,
1875};
1876
1877static struct net_proto_family packet_family_ops = {
1878 .family = PF_PACKET,
1879 .create = packet_create,
1880 .owner = THIS_MODULE,
1881};
1882
1883static struct notifier_block packet_netdev_notifier = {
1884 .notifier_call =packet_notifier,
1885};
1886
1887#ifdef CONFIG_PROC_FS
1888static inline struct sock *packet_seq_idx(loff_t off)
1889{
1890 struct sock *s;
1891 struct hlist_node *node;
1892
1893 sk_for_each(s, node, &packet_sklist) {
1894 if (!off--)
1895 return s;
1896 }
1897 return NULL;
1898}
1899
1900static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1901{
1902 read_lock(&packet_sklist_lock);
1903 return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1904}
1905
1906static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1907{
1908 ++*pos;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001909 return (v == SEQ_START_TOKEN)
1910 ? sk_head(&packet_sklist)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911 : sk_next((struct sock*)v) ;
1912}
1913
1914static void packet_seq_stop(struct seq_file *seq, void *v)
1915{
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001916 read_unlock(&packet_sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001917}
1918
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001919static int packet_seq_show(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001920{
1921 if (v == SEQ_START_TOKEN)
1922 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
1923 else {
1924 struct sock *s = v;
1925 const struct packet_sock *po = pkt_sk(s);
1926
1927 seq_printf(seq,
1928 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
1929 s,
1930 atomic_read(&s->sk_refcnt),
1931 s->sk_type,
1932 ntohs(po->num),
1933 po->ifindex,
1934 po->running,
1935 atomic_read(&s->sk_rmem_alloc),
1936 sock_i_uid(s),
1937 sock_i_ino(s) );
1938 }
1939
1940 return 0;
1941}
1942
1943static struct seq_operations packet_seq_ops = {
1944 .start = packet_seq_start,
1945 .next = packet_seq_next,
1946 .stop = packet_seq_stop,
1947 .show = packet_seq_show,
1948};
1949
1950static int packet_seq_open(struct inode *inode, struct file *file)
1951{
1952 return seq_open(file, &packet_seq_ops);
1953}
1954
1955static struct file_operations packet_seq_fops = {
1956 .owner = THIS_MODULE,
1957 .open = packet_seq_open,
1958 .read = seq_read,
1959 .llseek = seq_lseek,
1960 .release = seq_release,
1961};
1962
1963#endif
1964
1965static void __exit packet_exit(void)
1966{
1967 proc_net_remove("packet");
1968 unregister_netdevice_notifier(&packet_netdev_notifier);
1969 sock_unregister(PF_PACKET);
1970 proto_unregister(&packet_proto);
1971}
1972
1973static int __init packet_init(void)
1974{
1975 int rc = proto_register(&packet_proto, 0);
1976
1977 if (rc != 0)
1978 goto out;
1979
1980 sock_register(&packet_family_ops);
1981 register_netdevice_notifier(&packet_netdev_notifier);
1982 proc_net_fops_create("packet", 0, &packet_seq_fops);
1983out:
1984 return rc;
1985}
1986
1987module_init(packet_init);
1988module_exit(packet_exit);
1989MODULE_LICENSE("GPL");
1990MODULE_ALIAS_NETPROTO(PF_PACKET);