blob: bf2699074774dc436d53d5276c2375721df58cf1 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
8 * Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9 *
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 *
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090014 * Fixes:
Linus Torvalds1da177e2005-04-16 15:20:36 -070015 * Alan Cox : verify_area() now used correctly
16 * Alan Cox : new skbuff lists, look ma no backlogs!
17 * Alan Cox : tidied skbuff lists.
18 * Alan Cox : Now uses generic datagram routines I
19 * added. Also fixed the peek/read crash
20 * from all old Linux datagram code.
21 * Alan Cox : Uses the improved datagram code.
22 * Alan Cox : Added NULL's for socket options.
23 * Alan Cox : Re-commented the code.
24 * Alan Cox : Use new kernel side addressing
25 * Rob Janssen : Correct MTU usage.
26 * Dave Platt : Counter leaks caused by incorrect
27 * interrupt locking and some slightly
28 * dubious gcc output. Can you read
29 * compiler: it said _VOLATILE_
30 * Richard Kooijman : Timestamp fixes.
31 * Alan Cox : New buffers. Use sk->mac.raw.
32 * Alan Cox : sendmsg/recvmsg support.
33 * Alan Cox : Protocol setting support
34 * Alexey Kuznetsov : Untied from IPv4 stack.
35 * Cyrus Durgin : Fixed kerneld for kmod.
36 * Michal Ostrowski : Module initialization cleanup.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090037 * Ulises Alonso : Frame number limit removal and
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 * packet_set_ring memory leak.
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070039 * Eric Biederman : Allow for > 8 byte hardware addresses.
40 * The convention is that longer addresses
41 * will simply extend the hardware address
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090042 * byte arrays at the end of sockaddr_ll
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070043 * and packet_mreq.
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 *
45 * This program is free software; you can redistribute it and/or
46 * modify it under the terms of the GNU General Public License
47 * as published by the Free Software Foundation; either version
48 * 2 of the License, or (at your option) any later version.
49 *
50 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090051
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070053#include <linux/mm.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080054#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <linux/fcntl.h>
56#include <linux/socket.h>
57#include <linux/in.h>
58#include <linux/inet.h>
59#include <linux/netdevice.h>
60#include <linux/if_packet.h>
61#include <linux/wireless.h>
Herbert Xuffbc6112007-02-04 23:33:10 -080062#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070063#include <linux/kmod.h>
64#include <net/ip.h>
65#include <net/protocol.h>
66#include <linux/skbuff.h>
67#include <net/sock.h>
68#include <linux/errno.h>
69#include <linux/timer.h>
70#include <asm/system.h>
71#include <asm/uaccess.h>
72#include <asm/ioctls.h>
73#include <asm/page.h>
Al Viroa1f8e7f2006-10-19 16:08:53 -040074#include <asm/cacheflush.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070075#include <asm/io.h>
76#include <linux/proc_fs.h>
77#include <linux/seq_file.h>
78#include <linux/poll.h>
79#include <linux/module.h>
80#include <linux/init.h>
81
82#ifdef CONFIG_INET
83#include <net/inet_common.h>
84#endif
85
86#define CONFIG_SOCK_PACKET 1
87
88/*
89 Proposed replacement for SIOC{ADD,DEL}MULTI and
90 IFF_PROMISC, IFF_ALLMULTI flags.
91
92 It is more expensive, but I believe,
93 it is really correct solution: reentereble, safe and fault tolerant.
94
95 IFF_PROMISC/IFF_ALLMULTI/SIOC{ADD/DEL}MULTI are faked by keeping
96 reference count and global flag, so that real status is
97 (gflag|(count != 0)), so that we can use obsolete faulty interface
98 not harming clever users.
99 */
100#define CONFIG_PACKET_MULTICAST 1
101
102/*
103 Assumptions:
104 - if device has no dev->hard_header routine, it adds and removes ll header
105 inside itself. In this case ll header is invisible outside of device,
106 but higher levels still should reserve dev->hard_header_len.
107 Some devices are enough clever to reallocate skb, when header
108 will not fit to reserved space (tunnel), another ones are silly
109 (PPP).
110 - packet socket receives packets with pulled ll header,
111 so that SOCK_RAW should push it back.
112
113On receive:
114-----------
115
116Incoming, dev->hard_header!=NULL
117 mac.raw -> ll header
118 data -> data
119
120Outgoing, dev->hard_header!=NULL
121 mac.raw -> ll header
122 data -> ll header
123
124Incoming, dev->hard_header==NULL
125 mac.raw -> UNKNOWN position. It is very likely, that it points to ll header.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900126 PPP makes it, that is wrong, because introduce assymetry
Linus Torvalds1da177e2005-04-16 15:20:36 -0700127 between rx and tx paths.
128 data -> data
129
130Outgoing, dev->hard_header==NULL
131 mac.raw -> data. ll header is still not built!
132 data -> data
133
134Resume
135 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
136
137
138On transmit:
139------------
140
141dev->hard_header != NULL
142 mac.raw -> ll header
143 data -> ll header
144
145dev->hard_header == NULL (ll header is added by device, we cannot control it)
146 mac.raw -> data
147 data -> data
148
149 We should set nh.raw on output to correct posistion,
150 packet classifier depends on it.
151 */
152
153/* List of all packet sockets. */
154static HLIST_HEAD(packet_sklist);
155static DEFINE_RWLOCK(packet_sklist_lock);
156
157static atomic_t packet_socks_nr;
158
159
160/* Private packet socket structures. */
161
162#ifdef CONFIG_PACKET_MULTICAST
163struct packet_mclist
164{
165 struct packet_mclist *next;
166 int ifindex;
167 int count;
168 unsigned short type;
169 unsigned short alen;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700170 unsigned char addr[MAX_ADDR_LEN];
171};
172/* identical to struct packet_mreq except it has
173 * a longer address field.
174 */
175struct packet_mreq_max
176{
177 int mr_ifindex;
178 unsigned short mr_type;
179 unsigned short mr_alen;
180 unsigned char mr_address[MAX_ADDR_LEN];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181};
182#endif
183#ifdef CONFIG_PACKET_MMAP
184static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
185#endif
186
187static void packet_flush_mclist(struct sock *sk);
188
189struct packet_sock {
190 /* struct sock has to be the first member of packet_sock */
191 struct sock sk;
192 struct tpacket_stats stats;
193#ifdef CONFIG_PACKET_MMAP
194 char * *pg_vec;
195 unsigned int head;
196 unsigned int frames_per_block;
197 unsigned int frame_size;
198 unsigned int frame_max;
199 int copy_thresh;
200#endif
201 struct packet_type prot_hook;
202 spinlock_t bind_lock;
Herbert Xu8dc41942007-02-04 23:31:32 -0800203 unsigned int running:1, /* prot_hook is attached*/
204 auxdata:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 int ifindex; /* bound device */
Al Viro0e11c912006-11-08 00:26:29 -0800206 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207#ifdef CONFIG_PACKET_MULTICAST
208 struct packet_mclist *mclist;
209#endif
210#ifdef CONFIG_PACKET_MMAP
211 atomic_t mapped;
212 unsigned int pg_vec_order;
213 unsigned int pg_vec_pages;
214 unsigned int pg_vec_len;
215#endif
216};
217
Herbert Xuffbc6112007-02-04 23:33:10 -0800218struct packet_skb_cb {
219 unsigned int origlen;
220 union {
221 struct sockaddr_pkt pkt;
222 struct sockaddr_ll ll;
223 } sa;
224};
225
226#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
Herbert Xu8dc41942007-02-04 23:31:32 -0800227
Linus Torvalds1da177e2005-04-16 15:20:36 -0700228#ifdef CONFIG_PACKET_MMAP
229
230static inline char *packet_lookup_frame(struct packet_sock *po, unsigned int position)
231{
232 unsigned int pg_vec_pos, frame_offset;
233 char *frame;
234
235 pg_vec_pos = position / po->frames_per_block;
236 frame_offset = position % po->frames_per_block;
237
238 frame = po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900239
Linus Torvalds1da177e2005-04-16 15:20:36 -0700240 return frame;
241}
242#endif
243
244static inline struct packet_sock *pkt_sk(struct sock *sk)
245{
246 return (struct packet_sock *)sk;
247}
248
249static void packet_sock_destruct(struct sock *sk)
250{
251 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
252 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
253
254 if (!sock_flag(sk, SOCK_DEAD)) {
255 printk("Attempt to release alive packet socket: %p\n", sk);
256 return;
257 }
258
259 atomic_dec(&packet_socks_nr);
260#ifdef PACKET_REFCNT_DEBUG
261 printk(KERN_DEBUG "PACKET socket %p is free, %d are alive\n", sk, atomic_read(&packet_socks_nr));
262#endif
263}
264
265
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800266static const struct proto_ops packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267
268#ifdef CONFIG_SOCK_PACKET
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800269static const struct proto_ops packet_ops_spkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700270
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700271static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700272{
273 struct sock *sk;
274 struct sockaddr_pkt *spkt;
275
276 /*
277 * When we registered the protocol we saved the socket in the data
278 * field for just this event.
279 */
280
281 sk = pt->af_packet_priv;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900282
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283 /*
284 * Yank back the headers [hope the device set this
285 * right or kerboom...]
286 *
287 * Incoming packets have ll header pulled,
288 * push it back.
289 *
290 * For outgoing ones skb->data == skb->mac.raw
291 * so that this procedure is noop.
292 */
293
294 if (skb->pkt_type == PACKET_LOOPBACK)
295 goto out;
296
297 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
298 goto oom;
299
300 /* drop any routing info */
301 dst_release(skb->dst);
302 skb->dst = NULL;
303
Phil Oester84531c22005-07-12 11:57:52 -0700304 /* drop conntrack reference */
305 nf_reset(skb);
306
Herbert Xuffbc6112007-02-04 23:33:10 -0800307 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700308
309 skb_push(skb, skb->data-skb->mac.raw);
310
311 /*
312 * The SOCK_PACKET socket receives _all_ frames.
313 */
314
315 spkt->spkt_family = dev->type;
316 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
317 spkt->spkt_protocol = skb->protocol;
318
319 /*
320 * Charge the memory to the socket. This is done specifically
321 * to prevent sockets using all the memory up.
322 */
323
324 if (sock_queue_rcv_skb(sk,skb) == 0)
325 return 0;
326
327out:
328 kfree_skb(skb);
329oom:
330 return 0;
331}
332
333
334/*
335 * Output a raw packet to a device layer. This bypasses all the other
336 * protocol layers and you must therefore supply it with a complete frame
337 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900338
Linus Torvalds1da177e2005-04-16 15:20:36 -0700339static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
340 struct msghdr *msg, size_t len)
341{
342 struct sock *sk = sock->sk;
343 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
344 struct sk_buff *skb;
345 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -0800346 __be16 proto=0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347 int err;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900348
Linus Torvalds1da177e2005-04-16 15:20:36 -0700349 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900350 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351 */
352
353 if (saddr)
354 {
355 if (msg->msg_namelen < sizeof(struct sockaddr))
356 return(-EINVAL);
357 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
358 proto=saddr->spkt_protocol;
359 }
360 else
361 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
362
363 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900364 * Find the device first to size check it
Linus Torvalds1da177e2005-04-16 15:20:36 -0700365 */
366
367 saddr->spkt_device[13] = 0;
368 dev = dev_get_by_name(saddr->spkt_device);
369 err = -ENODEV;
370 if (dev == NULL)
371 goto out_unlock;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900372
David S. Millerd5e76b02007-01-25 19:30:36 -0800373 err = -ENETDOWN;
374 if (!(dev->flags & IFF_UP))
375 goto out_unlock;
376
Linus Torvalds1da177e2005-04-16 15:20:36 -0700377 /*
378 * You may not queue a frame bigger than the mtu. This is the lowest level
379 * raw protocol and you must do your own fragmentation at this level.
380 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900381
Linus Torvalds1da177e2005-04-16 15:20:36 -0700382 err = -EMSGSIZE;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800383 if (len > dev->mtu + dev->hard_header_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384 goto out_unlock;
385
386 err = -ENOBUFS;
387 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
388
389 /*
390 * If the write buffer is full, then tough. At this level the user gets to
391 * deal with the problem - do your own algorithmic backoffs. That's far
392 * more flexible.
393 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900394
395 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700396 goto out_unlock;
397
398 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900399 * Fill it in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700400 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900401
Linus Torvalds1da177e2005-04-16 15:20:36 -0700402 /* FIXME: Save some space for broken drivers that write a
403 * hard header at transmission time by themselves. PPP is the
404 * notable one here. This should really be fixed at the driver level.
405 */
406 skb_reserve(skb, LL_RESERVED_SPACE(dev));
407 skb->nh.raw = skb->data;
408
409 /* Try to align data part correctly */
410 if (dev->hard_header) {
411 skb->data -= dev->hard_header_len;
412 skb->tail -= dev->hard_header_len;
413 if (len < dev->hard_header_len)
414 skb->nh.raw = skb->data;
415 }
416
417 /* Returns -EFAULT on error */
418 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
419 skb->protocol = proto;
420 skb->dev = dev;
421 skb->priority = sk->sk_priority;
422 if (err)
423 goto out_free;
424
Linus Torvalds1da177e2005-04-16 15:20:36 -0700425 /*
426 * Now send it
427 */
428
429 dev_queue_xmit(skb);
430 dev_put(dev);
431 return(len);
432
433out_free:
434 kfree_skb(skb);
435out_unlock:
436 if (dev)
437 dev_put(dev);
438 return err;
439}
440#endif
441
David S. Millerdbcb5852007-01-24 15:21:02 -0800442static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
443 unsigned int res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444{
445 struct sk_filter *filter;
446
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700447 rcu_read_lock_bh();
448 filter = rcu_dereference(sk->sk_filter);
David S. Millerdbcb5852007-01-24 15:21:02 -0800449 if (filter != NULL)
450 res = sk_run_filter(skb, filter->insns, filter->len);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700451 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700452
David S. Millerdbcb5852007-01-24 15:21:02 -0800453 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700454}
455
456/*
457 This function makes lazy skb cloning in hope that most of packets
458 are discarded by BPF.
459
460 Note tricky part: we DO mangle shared skb! skb->data, skb->len
461 and skb->cb are mangled. It works because (and until) packets
462 falling here are owned by current CPU. Output packets are cloned
463 by dev_queue_xmit_nit(), input packets are processed by net_bh
464 sequencially, so that if we return skb to original state on exit,
465 we will not harm anyone.
466 */
467
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700468static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469{
470 struct sock *sk;
471 struct sockaddr_ll *sll;
472 struct packet_sock *po;
473 u8 * skb_head = skb->data;
474 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800475 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476
477 if (skb->pkt_type == PACKET_LOOPBACK)
478 goto drop;
479
480 sk = pt->af_packet_priv;
481 po = pkt_sk(sk);
482
483 skb->dev = dev;
484
485 if (dev->hard_header) {
486 /* The device has an explicit notion of ll header,
487 exported to higher levels.
488
489 Otherwise, the device hides datails of it frame
490 structure, so that corresponding packet head
491 never delivered to user.
492 */
493 if (sk->sk_type != SOCK_DGRAM)
494 skb_push(skb, skb->data - skb->mac.raw);
495 else if (skb->pkt_type == PACKET_OUTGOING) {
496 /* Special case: outgoing packets have ll header at head */
497 skb_pull(skb, skb->nh.raw - skb->data);
498 }
499 }
500
501 snaplen = skb->len;
502
David S. Millerdbcb5852007-01-24 15:21:02 -0800503 res = run_filter(skb, sk, snaplen);
504 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700505 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800506 if (snaplen > res)
507 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508
509 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
510 (unsigned)sk->sk_rcvbuf)
511 goto drop_n_acct;
512
513 if (skb_shared(skb)) {
514 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
515 if (nskb == NULL)
516 goto drop_n_acct;
517
518 if (skb_head != skb->data) {
519 skb->data = skb_head;
520 skb->len = skb_len;
521 }
522 kfree_skb(skb);
523 skb = nskb;
524 }
525
Herbert Xuffbc6112007-02-04 23:33:10 -0800526 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
527 sizeof(skb->cb));
528
529 sll = &PACKET_SKB_CB(skb)->sa.ll;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700530 sll->sll_family = AF_PACKET;
531 sll->sll_hatype = dev->type;
532 sll->sll_protocol = skb->protocol;
533 sll->sll_pkttype = skb->pkt_type;
534 sll->sll_ifindex = dev->ifindex;
535 sll->sll_halen = 0;
536
537 if (dev->hard_header_parse)
538 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
539
Herbert Xuffbc6112007-02-04 23:33:10 -0800540 PACKET_SKB_CB(skb)->origlen = skb->len;
Herbert Xu8dc41942007-02-04 23:31:32 -0800541
Linus Torvalds1da177e2005-04-16 15:20:36 -0700542 if (pskb_trim(skb, snaplen))
543 goto drop_n_acct;
544
545 skb_set_owner_r(skb, sk);
546 skb->dev = NULL;
547 dst_release(skb->dst);
548 skb->dst = NULL;
549
Phil Oester84531c22005-07-12 11:57:52 -0700550 /* drop conntrack reference */
551 nf_reset(skb);
552
Linus Torvalds1da177e2005-04-16 15:20:36 -0700553 spin_lock(&sk->sk_receive_queue.lock);
554 po->stats.tp_packets++;
555 __skb_queue_tail(&sk->sk_receive_queue, skb);
556 spin_unlock(&sk->sk_receive_queue.lock);
557 sk->sk_data_ready(sk, skb->len);
558 return 0;
559
560drop_n_acct:
561 spin_lock(&sk->sk_receive_queue.lock);
562 po->stats.tp_drops++;
563 spin_unlock(&sk->sk_receive_queue.lock);
564
565drop_n_restore:
566 if (skb_head != skb->data && skb_shared(skb)) {
567 skb->data = skb_head;
568 skb->len = skb_len;
569 }
570drop:
571 kfree_skb(skb);
572 return 0;
573}
574
575#ifdef CONFIG_PACKET_MMAP
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700576static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700577{
578 struct sock *sk;
579 struct packet_sock *po;
580 struct sockaddr_ll *sll;
581 struct tpacket_hdr *h;
582 u8 * skb_head = skb->data;
583 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800584 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700585 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
586 unsigned short macoff, netoff;
587 struct sk_buff *copy_skb = NULL;
588
589 if (skb->pkt_type == PACKET_LOOPBACK)
590 goto drop;
591
592 sk = pt->af_packet_priv;
593 po = pkt_sk(sk);
594
595 if (dev->hard_header) {
596 if (sk->sk_type != SOCK_DGRAM)
597 skb_push(skb, skb->data - skb->mac.raw);
598 else if (skb->pkt_type == PACKET_OUTGOING) {
599 /* Special case: outgoing packets have ll header at head */
600 skb_pull(skb, skb->nh.raw - skb->data);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700601 }
602 }
603
Herbert Xu8dc41942007-02-04 23:31:32 -0800604 if (skb->ip_summed == CHECKSUM_PARTIAL)
605 status |= TP_STATUS_CSUMNOTREADY;
606
Linus Torvalds1da177e2005-04-16 15:20:36 -0700607 snaplen = skb->len;
608
David S. Millerdbcb5852007-01-24 15:21:02 -0800609 res = run_filter(skb, sk, snaplen);
610 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700611 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800612 if (snaplen > res)
613 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700614
615 if (sk->sk_type == SOCK_DGRAM) {
616 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
617 } else {
618 unsigned maclen = skb->nh.raw - skb->data;
619 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
620 macoff = netoff - maclen;
621 }
622
623 if (macoff + snaplen > po->frame_size) {
624 if (po->copy_thresh &&
625 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
626 (unsigned)sk->sk_rcvbuf) {
627 if (skb_shared(skb)) {
628 copy_skb = skb_clone(skb, GFP_ATOMIC);
629 } else {
630 copy_skb = skb_get(skb);
631 skb_head = skb->data;
632 }
633 if (copy_skb)
634 skb_set_owner_r(copy_skb, sk);
635 }
636 snaplen = po->frame_size - macoff;
637 if ((int)snaplen < 0)
638 snaplen = 0;
639 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640
641 spin_lock(&sk->sk_receive_queue.lock);
642 h = (struct tpacket_hdr *)packet_lookup_frame(po, po->head);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900643
Linus Torvalds1da177e2005-04-16 15:20:36 -0700644 if (h->tp_status)
645 goto ring_is_full;
646 po->head = po->head != po->frame_max ? po->head+1 : 0;
647 po->stats.tp_packets++;
648 if (copy_skb) {
649 status |= TP_STATUS_COPY;
650 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
651 }
652 if (!po->stats.tp_drops)
653 status &= ~TP_STATUS_LOSING;
654 spin_unlock(&sk->sk_receive_queue.lock);
655
Patrick McHardycbe21d82006-09-17 23:59:57 -0700656 skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700657
658 h->tp_len = skb->len;
659 h->tp_snaplen = snaplen;
660 h->tp_mac = macoff;
661 h->tp_net = netoff;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900662 if (skb->tstamp.off_sec == 0) {
Patrick McHardya61bbcf2005-08-14 17:24:31 -0700663 __net_timestamp(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700664 sock_enable_timestamp(sk);
665 }
Herbert Xu325ed822005-10-03 13:57:23 -0700666 h->tp_sec = skb->tstamp.off_sec;
667 h->tp_usec = skb->tstamp.off_usec;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700668
669 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
670 sll->sll_halen = 0;
671 if (dev->hard_header_parse)
672 sll->sll_halen = dev->hard_header_parse(skb, sll->sll_addr);
673 sll->sll_family = AF_PACKET;
674 sll->sll_hatype = dev->type;
675 sll->sll_protocol = skb->protocol;
676 sll->sll_pkttype = skb->pkt_type;
677 sll->sll_ifindex = dev->ifindex;
678
679 h->tp_status = status;
Ralf Baechlee16aa202006-12-07 00:11:33 -0800680 smp_mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700681
682 {
683 struct page *p_start, *p_end;
684 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
685
686 p_start = virt_to_page(h);
687 p_end = virt_to_page(h_end);
688 while (p_start <= p_end) {
689 flush_dcache_page(p_start);
690 p_start++;
691 }
692 }
693
694 sk->sk_data_ready(sk, 0);
695
696drop_n_restore:
697 if (skb_head != skb->data && skb_shared(skb)) {
698 skb->data = skb_head;
699 skb->len = skb_len;
700 }
701drop:
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900702 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700703 return 0;
704
705ring_is_full:
706 po->stats.tp_drops++;
707 spin_unlock(&sk->sk_receive_queue.lock);
708
709 sk->sk_data_ready(sk, 0);
710 if (copy_skb)
711 kfree_skb(copy_skb);
712 goto drop_n_restore;
713}
714
715#endif
716
717
718static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
719 struct msghdr *msg, size_t len)
720{
721 struct sock *sk = sock->sk;
722 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
723 struct sk_buff *skb;
724 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -0800725 __be16 proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700726 unsigned char *addr;
727 int ifindex, err, reserve = 0;
728
729 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900730 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700731 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900732
Linus Torvalds1da177e2005-04-16 15:20:36 -0700733 if (saddr == NULL) {
734 struct packet_sock *po = pkt_sk(sk);
735
736 ifindex = po->ifindex;
737 proto = po->num;
738 addr = NULL;
739 } else {
740 err = -EINVAL;
741 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
742 goto out;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700743 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
744 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700745 ifindex = saddr->sll_ifindex;
746 proto = saddr->sll_protocol;
747 addr = saddr->sll_addr;
748 }
749
750
751 dev = dev_get_by_index(ifindex);
752 err = -ENXIO;
753 if (dev == NULL)
754 goto out_unlock;
755 if (sock->type == SOCK_RAW)
756 reserve = dev->hard_header_len;
757
David S. Millerd5e76b02007-01-25 19:30:36 -0800758 err = -ENETDOWN;
759 if (!(dev->flags & IFF_UP))
760 goto out_unlock;
761
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762 err = -EMSGSIZE;
763 if (len > dev->mtu+reserve)
764 goto out_unlock;
765
766 skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
767 msg->msg_flags & MSG_DONTWAIT, &err);
768 if (skb==NULL)
769 goto out_unlock;
770
771 skb_reserve(skb, LL_RESERVED_SPACE(dev));
772 skb->nh.raw = skb->data;
773
774 if (dev->hard_header) {
775 int res;
776 err = -EINVAL;
777 res = dev->hard_header(skb, dev, ntohs(proto), addr, NULL, len);
778 if (sock->type != SOCK_DGRAM) {
779 skb->tail = skb->data;
780 skb->len = 0;
781 } else if (res < 0)
782 goto out_free;
783 }
784
785 /* Returns -EFAULT on error */
786 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
787 if (err)
788 goto out_free;
789
790 skb->protocol = proto;
791 skb->dev = dev;
792 skb->priority = sk->sk_priority;
793
Linus Torvalds1da177e2005-04-16 15:20:36 -0700794 /*
795 * Now send it
796 */
797
798 err = dev_queue_xmit(skb);
799 if (err > 0 && (err = net_xmit_errno(err)) != 0)
800 goto out_unlock;
801
802 dev_put(dev);
803
804 return(len);
805
806out_free:
807 kfree_skb(skb);
808out_unlock:
809 if (dev)
810 dev_put(dev);
811out:
812 return err;
813}
814
815/*
816 * Close a PACKET socket. This is fairly simple. We immediately go
817 * to 'closed' state and remove our protocol entry in the device list.
818 */
819
820static int packet_release(struct socket *sock)
821{
822 struct sock *sk = sock->sk;
823 struct packet_sock *po;
824
825 if (!sk)
826 return 0;
827
828 po = pkt_sk(sk);
829
830 write_lock_bh(&packet_sklist_lock);
831 sk_del_node_init(sk);
832 write_unlock_bh(&packet_sklist_lock);
833
834 /*
835 * Unhook packet receive handler.
836 */
837
838 if (po->running) {
839 /*
840 * Remove the protocol hook
841 */
842 dev_remove_pack(&po->prot_hook);
843 po->running = 0;
844 po->num = 0;
845 __sock_put(sk);
846 }
847
848#ifdef CONFIG_PACKET_MULTICAST
849 packet_flush_mclist(sk);
850#endif
851
852#ifdef CONFIG_PACKET_MMAP
853 if (po->pg_vec) {
854 struct tpacket_req req;
855 memset(&req, 0, sizeof(req));
856 packet_set_ring(sk, &req, 1);
857 }
858#endif
859
860 /*
861 * Now the socket is dead. No more input will appear.
862 */
863
864 sock_orphan(sk);
865 sock->sk = NULL;
866
867 /* Purge queues */
868
869 skb_queue_purge(&sk->sk_receive_queue);
870
871 sock_put(sk);
872 return 0;
873}
874
875/*
876 * Attach a packet hook.
877 */
878
Al Viro0e11c912006-11-08 00:26:29 -0800879static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700880{
881 struct packet_sock *po = pkt_sk(sk);
882 /*
883 * Detach an existing hook if present.
884 */
885
886 lock_sock(sk);
887
888 spin_lock(&po->bind_lock);
889 if (po->running) {
890 __sock_put(sk);
891 po->running = 0;
892 po->num = 0;
893 spin_unlock(&po->bind_lock);
894 dev_remove_pack(&po->prot_hook);
895 spin_lock(&po->bind_lock);
896 }
897
898 po->num = protocol;
899 po->prot_hook.type = protocol;
900 po->prot_hook.dev = dev;
901
902 po->ifindex = dev ? dev->ifindex : 0;
903
904 if (protocol == 0)
905 goto out_unlock;
906
907 if (dev) {
908 if (dev->flags&IFF_UP) {
909 dev_add_pack(&po->prot_hook);
910 sock_hold(sk);
911 po->running = 1;
912 } else {
913 sk->sk_err = ENETDOWN;
914 if (!sock_flag(sk, SOCK_DEAD))
915 sk->sk_error_report(sk);
916 }
917 } else {
918 dev_add_pack(&po->prot_hook);
919 sock_hold(sk);
920 po->running = 1;
921 }
922
923out_unlock:
924 spin_unlock(&po->bind_lock);
925 release_sock(sk);
926 return 0;
927}
928
929/*
930 * Bind a packet socket to a device
931 */
932
933#ifdef CONFIG_SOCK_PACKET
934
935static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
936{
937 struct sock *sk=sock->sk;
938 char name[15];
939 struct net_device *dev;
940 int err = -ENODEV;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900941
Linus Torvalds1da177e2005-04-16 15:20:36 -0700942 /*
943 * Check legality
944 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900945
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800946 if (addr_len != sizeof(struct sockaddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700947 return -EINVAL;
948 strlcpy(name,uaddr->sa_data,sizeof(name));
949
950 dev = dev_get_by_name(name);
951 if (dev) {
952 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
953 dev_put(dev);
954 }
955 return err;
956}
957#endif
958
959static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
960{
961 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
962 struct sock *sk=sock->sk;
963 struct net_device *dev = NULL;
964 int err;
965
966
967 /*
968 * Check legality
969 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900970
Linus Torvalds1da177e2005-04-16 15:20:36 -0700971 if (addr_len < sizeof(struct sockaddr_ll))
972 return -EINVAL;
973 if (sll->sll_family != AF_PACKET)
974 return -EINVAL;
975
976 if (sll->sll_ifindex) {
977 err = -ENODEV;
978 dev = dev_get_by_index(sll->sll_ifindex);
979 if (dev == NULL)
980 goto out;
981 }
982 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
983 if (dev)
984 dev_put(dev);
985
986out:
987 return err;
988}
989
990static struct proto packet_proto = {
991 .name = "PACKET",
992 .owner = THIS_MODULE,
993 .obj_size = sizeof(struct packet_sock),
994};
995
996/*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900997 * Create a packet of type SOCK_PACKET.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 */
999
1000static int packet_create(struct socket *sock, int protocol)
1001{
1002 struct sock *sk;
1003 struct packet_sock *po;
Al Viro0e11c912006-11-08 00:26:29 -08001004 __be16 proto = (__force __be16)protocol; /* weird, but documented */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001005 int err;
1006
1007 if (!capable(CAP_NET_RAW))
1008 return -EPERM;
1009 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW
1010#ifdef CONFIG_SOCK_PACKET
1011 && sock->type != SOCK_PACKET
1012#endif
1013 )
1014 return -ESOCKTNOSUPPORT;
1015
1016 sock->state = SS_UNCONNECTED;
1017
1018 err = -ENOBUFS;
1019 sk = sk_alloc(PF_PACKET, GFP_KERNEL, &packet_proto, 1);
1020 if (sk == NULL)
1021 goto out;
1022
1023 sock->ops = &packet_ops;
1024#ifdef CONFIG_SOCK_PACKET
1025 if (sock->type == SOCK_PACKET)
1026 sock->ops = &packet_ops_spkt;
1027#endif
1028 sock_init_data(sock, sk);
1029
1030 po = pkt_sk(sk);
1031 sk->sk_family = PF_PACKET;
Al Viro0e11c912006-11-08 00:26:29 -08001032 po->num = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001033
1034 sk->sk_destruct = packet_sock_destruct;
1035 atomic_inc(&packet_socks_nr);
1036
1037 /*
1038 * Attach a protocol block
1039 */
1040
1041 spin_lock_init(&po->bind_lock);
1042 po->prot_hook.func = packet_rcv;
1043#ifdef CONFIG_SOCK_PACKET
1044 if (sock->type == SOCK_PACKET)
1045 po->prot_hook.func = packet_rcv_spkt;
1046#endif
1047 po->prot_hook.af_packet_priv = sk;
1048
Al Viro0e11c912006-11-08 00:26:29 -08001049 if (proto) {
1050 po->prot_hook.type = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001051 dev_add_pack(&po->prot_hook);
1052 sock_hold(sk);
1053 po->running = 1;
1054 }
1055
1056 write_lock_bh(&packet_sklist_lock);
1057 sk_add_node(sk, &packet_sklist);
1058 write_unlock_bh(&packet_sklist_lock);
1059 return(0);
1060out:
1061 return err;
1062}
1063
1064/*
1065 * Pull a packet from our receive queue and hand it to the user.
1066 * If necessary we block.
1067 */
1068
1069static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1070 struct msghdr *msg, size_t len, int flags)
1071{
1072 struct sock *sk = sock->sk;
1073 struct sk_buff *skb;
1074 int copied, err;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001075 struct sockaddr_ll *sll;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001076
1077 err = -EINVAL;
1078 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1079 goto out;
1080
1081#if 0
1082 /* What error should we return now? EUNATTACH? */
1083 if (pkt_sk(sk)->ifindex < 0)
1084 return -ENODEV;
1085#endif
1086
1087 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001088 * Call the generic datagram receiver. This handles all sorts
1089 * of horrible races and re-entrancy so we can forget about it
1090 * in the protocol layers.
1091 *
1092 * Now it will return ENETDOWN, if device have just gone down,
1093 * but then it will block.
1094 */
1095
1096 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1097
1098 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001099 * An error occurred so return it. Because skb_recv_datagram()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001100 * handles the blocking we don't see and worry about blocking
1101 * retries.
1102 */
1103
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001104 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001105 goto out;
1106
1107 /*
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001108 * If the address length field is there to be filled in, we fill
1109 * it in now.
1110 */
1111
Herbert Xuffbc6112007-02-04 23:33:10 -08001112 sll = &PACKET_SKB_CB(skb)->sa.ll;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001113 if (sock->type == SOCK_PACKET)
1114 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1115 else
1116 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1117
1118 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001119 * You lose any data beyond the buffer you gave. If it worries a
1120 * user program they can ask the device for its MTU anyway.
1121 */
1122
1123 copied = skb->len;
1124 if (copied > len)
1125 {
1126 copied=len;
1127 msg->msg_flags|=MSG_TRUNC;
1128 }
1129
1130 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1131 if (err)
1132 goto out_free;
1133
1134 sock_recv_timestamp(msg, sk, skb);
1135
1136 if (msg->msg_name)
Herbert Xuffbc6112007-02-04 23:33:10 -08001137 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1138 msg->msg_namelen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139
Herbert Xu8dc41942007-02-04 23:31:32 -08001140 if (pkt_sk(sk)->auxdata) {
Herbert Xuffbc6112007-02-04 23:33:10 -08001141 struct tpacket_auxdata aux;
1142
1143 aux.tp_status = TP_STATUS_USER;
1144 if (skb->ip_summed == CHECKSUM_PARTIAL)
1145 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1146 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1147 aux.tp_snaplen = skb->len;
1148 aux.tp_mac = 0;
1149 aux.tp_net = skb->nh.raw - skb->data;
1150
1151 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
Herbert Xu8dc41942007-02-04 23:31:32 -08001152 }
1153
Linus Torvalds1da177e2005-04-16 15:20:36 -07001154 /*
1155 * Free or return the buffer as appropriate. Again this
1156 * hides all the races and re-entrancy issues from us.
1157 */
1158 err = (flags&MSG_TRUNC) ? skb->len : copied;
1159
1160out_free:
1161 skb_free_datagram(sk, skb);
1162out:
1163 return err;
1164}
1165
1166#ifdef CONFIG_SOCK_PACKET
1167static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1168 int *uaddr_len, int peer)
1169{
1170 struct net_device *dev;
1171 struct sock *sk = sock->sk;
1172
1173 if (peer)
1174 return -EOPNOTSUPP;
1175
1176 uaddr->sa_family = AF_PACKET;
1177 dev = dev_get_by_index(pkt_sk(sk)->ifindex);
1178 if (dev) {
1179 strlcpy(uaddr->sa_data, dev->name, 15);
1180 dev_put(dev);
1181 } else
1182 memset(uaddr->sa_data, 0, 14);
1183 *uaddr_len = sizeof(*uaddr);
1184
1185 return 0;
1186}
1187#endif
1188
1189static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1190 int *uaddr_len, int peer)
1191{
1192 struct net_device *dev;
1193 struct sock *sk = sock->sk;
1194 struct packet_sock *po = pkt_sk(sk);
1195 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1196
1197 if (peer)
1198 return -EOPNOTSUPP;
1199
1200 sll->sll_family = AF_PACKET;
1201 sll->sll_ifindex = po->ifindex;
1202 sll->sll_protocol = po->num;
1203 dev = dev_get_by_index(po->ifindex);
1204 if (dev) {
1205 sll->sll_hatype = dev->type;
1206 sll->sll_halen = dev->addr_len;
1207 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1208 dev_put(dev);
1209 } else {
1210 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1211 sll->sll_halen = 0;
1212 }
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001213 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001214
1215 return 0;
1216}
1217
1218#ifdef CONFIG_PACKET_MULTICAST
1219static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1220{
1221 switch (i->type) {
1222 case PACKET_MR_MULTICAST:
1223 if (what > 0)
1224 dev_mc_add(dev, i->addr, i->alen, 0);
1225 else
1226 dev_mc_delete(dev, i->addr, i->alen, 0);
1227 break;
1228 case PACKET_MR_PROMISC:
1229 dev_set_promiscuity(dev, what);
1230 break;
1231 case PACKET_MR_ALLMULTI:
1232 dev_set_allmulti(dev, what);
1233 break;
1234 default:;
1235 }
1236}
1237
1238static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1239{
1240 for ( ; i; i=i->next) {
1241 if (i->ifindex == dev->ifindex)
1242 packet_dev_mc(dev, i, what);
1243 }
1244}
1245
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001246static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001247{
1248 struct packet_sock *po = pkt_sk(sk);
1249 struct packet_mclist *ml, *i;
1250 struct net_device *dev;
1251 int err;
1252
1253 rtnl_lock();
1254
1255 err = -ENODEV;
1256 dev = __dev_get_by_index(mreq->mr_ifindex);
1257 if (!dev)
1258 goto done;
1259
1260 err = -EINVAL;
1261 if (mreq->mr_alen > dev->addr_len)
1262 goto done;
1263
1264 err = -ENOBUFS;
Kris Katterjohn8b3a7002006-01-11 15:56:43 -08001265 i = kmalloc(sizeof(*i), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001266 if (i == NULL)
1267 goto done;
1268
1269 err = 0;
1270 for (ml = po->mclist; ml; ml = ml->next) {
1271 if (ml->ifindex == mreq->mr_ifindex &&
1272 ml->type == mreq->mr_type &&
1273 ml->alen == mreq->mr_alen &&
1274 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1275 ml->count++;
1276 /* Free the new element ... */
1277 kfree(i);
1278 goto done;
1279 }
1280 }
1281
1282 i->type = mreq->mr_type;
1283 i->ifindex = mreq->mr_ifindex;
1284 i->alen = mreq->mr_alen;
1285 memcpy(i->addr, mreq->mr_address, i->alen);
1286 i->count = 1;
1287 i->next = po->mclist;
1288 po->mclist = i;
1289 packet_dev_mc(dev, i, +1);
1290
1291done:
1292 rtnl_unlock();
1293 return err;
1294}
1295
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001296static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001297{
1298 struct packet_mclist *ml, **mlp;
1299
1300 rtnl_lock();
1301
1302 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1303 if (ml->ifindex == mreq->mr_ifindex &&
1304 ml->type == mreq->mr_type &&
1305 ml->alen == mreq->mr_alen &&
1306 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1307 if (--ml->count == 0) {
1308 struct net_device *dev;
1309 *mlp = ml->next;
1310 dev = dev_get_by_index(ml->ifindex);
1311 if (dev) {
1312 packet_dev_mc(dev, ml, -1);
1313 dev_put(dev);
1314 }
1315 kfree(ml);
1316 }
1317 rtnl_unlock();
1318 return 0;
1319 }
1320 }
1321 rtnl_unlock();
1322 return -EADDRNOTAVAIL;
1323}
1324
1325static void packet_flush_mclist(struct sock *sk)
1326{
1327 struct packet_sock *po = pkt_sk(sk);
1328 struct packet_mclist *ml;
1329
1330 if (!po->mclist)
1331 return;
1332
1333 rtnl_lock();
1334 while ((ml = po->mclist) != NULL) {
1335 struct net_device *dev;
1336
1337 po->mclist = ml->next;
1338 if ((dev = dev_get_by_index(ml->ifindex)) != NULL) {
1339 packet_dev_mc(dev, ml, -1);
1340 dev_put(dev);
1341 }
1342 kfree(ml);
1343 }
1344 rtnl_unlock();
1345}
1346#endif
1347
1348static int
1349packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1350{
1351 struct sock *sk = sock->sk;
Herbert Xu8dc41942007-02-04 23:31:32 -08001352 struct packet_sock *po = pkt_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001353 int ret;
1354
1355 if (level != SOL_PACKET)
1356 return -ENOPROTOOPT;
1357
1358 switch(optname) {
1359#ifdef CONFIG_PACKET_MULTICAST
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001360 case PACKET_ADD_MEMBERSHIP:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001361 case PACKET_DROP_MEMBERSHIP:
1362 {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001363 struct packet_mreq_max mreq;
1364 int len = optlen;
1365 memset(&mreq, 0, sizeof(mreq));
1366 if (len < sizeof(struct packet_mreq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001367 return -EINVAL;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001368 if (len > sizeof(mreq))
1369 len = sizeof(mreq);
1370 if (copy_from_user(&mreq,optval,len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001371 return -EFAULT;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001372 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1373 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001374 if (optname == PACKET_ADD_MEMBERSHIP)
1375 ret = packet_mc_add(sk, &mreq);
1376 else
1377 ret = packet_mc_drop(sk, &mreq);
1378 return ret;
1379 }
1380#endif
1381#ifdef CONFIG_PACKET_MMAP
1382 case PACKET_RX_RING:
1383 {
1384 struct tpacket_req req;
1385
1386 if (optlen<sizeof(req))
1387 return -EINVAL;
1388 if (copy_from_user(&req,optval,sizeof(req)))
1389 return -EFAULT;
1390 return packet_set_ring(sk, &req, 0);
1391 }
1392 case PACKET_COPY_THRESH:
1393 {
1394 int val;
1395
1396 if (optlen!=sizeof(val))
1397 return -EINVAL;
1398 if (copy_from_user(&val,optval,sizeof(val)))
1399 return -EFAULT;
1400
1401 pkt_sk(sk)->copy_thresh = val;
1402 return 0;
1403 }
1404#endif
Herbert Xu8dc41942007-02-04 23:31:32 -08001405 case PACKET_AUXDATA:
1406 {
1407 int val;
1408
1409 if (optlen < sizeof(val))
1410 return -EINVAL;
1411 if (copy_from_user(&val, optval, sizeof(val)))
1412 return -EFAULT;
1413
1414 po->auxdata = !!val;
1415 return 0;
1416 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417 default:
1418 return -ENOPROTOOPT;
1419 }
1420}
1421
1422static int packet_getsockopt(struct socket *sock, int level, int optname,
1423 char __user *optval, int __user *optlen)
1424{
1425 int len;
Herbert Xu8dc41942007-02-04 23:31:32 -08001426 int val;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001427 struct sock *sk = sock->sk;
1428 struct packet_sock *po = pkt_sk(sk);
Herbert Xu8dc41942007-02-04 23:31:32 -08001429 void *data;
1430 struct tpacket_stats st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431
1432 if (level != SOL_PACKET)
1433 return -ENOPROTOOPT;
1434
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001435 if (get_user(len, optlen))
1436 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001437
1438 if (len < 0)
1439 return -EINVAL;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001440
Linus Torvalds1da177e2005-04-16 15:20:36 -07001441 switch(optname) {
1442 case PACKET_STATISTICS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001443 if (len > sizeof(struct tpacket_stats))
1444 len = sizeof(struct tpacket_stats);
1445 spin_lock_bh(&sk->sk_receive_queue.lock);
1446 st = po->stats;
1447 memset(&po->stats, 0, sizeof(st));
1448 spin_unlock_bh(&sk->sk_receive_queue.lock);
1449 st.tp_packets += st.tp_drops;
1450
Herbert Xu8dc41942007-02-04 23:31:32 -08001451 data = &st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001452 break;
Herbert Xu8dc41942007-02-04 23:31:32 -08001453 case PACKET_AUXDATA:
1454 if (len > sizeof(int))
1455 len = sizeof(int);
1456 val = po->auxdata;
1457
1458 data = &val;
1459 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001460 default:
1461 return -ENOPROTOOPT;
1462 }
1463
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001464 if (put_user(len, optlen))
1465 return -EFAULT;
Herbert Xu8dc41942007-02-04 23:31:32 -08001466 if (copy_to_user(optval, data, len))
1467 return -EFAULT;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001468 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001469}
1470
1471
1472static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1473{
1474 struct sock *sk;
1475 struct hlist_node *node;
1476 struct net_device *dev = (struct net_device*)data;
1477
1478 read_lock(&packet_sklist_lock);
1479 sk_for_each(sk, node, &packet_sklist) {
1480 struct packet_sock *po = pkt_sk(sk);
1481
1482 switch (msg) {
1483 case NETDEV_UNREGISTER:
1484#ifdef CONFIG_PACKET_MULTICAST
1485 if (po->mclist)
1486 packet_dev_mclist(dev, po->mclist, -1);
1487 // fallthrough
1488#endif
1489 case NETDEV_DOWN:
1490 if (dev->ifindex == po->ifindex) {
1491 spin_lock(&po->bind_lock);
1492 if (po->running) {
1493 __dev_remove_pack(&po->prot_hook);
1494 __sock_put(sk);
1495 po->running = 0;
1496 sk->sk_err = ENETDOWN;
1497 if (!sock_flag(sk, SOCK_DEAD))
1498 sk->sk_error_report(sk);
1499 }
1500 if (msg == NETDEV_UNREGISTER) {
1501 po->ifindex = -1;
1502 po->prot_hook.dev = NULL;
1503 }
1504 spin_unlock(&po->bind_lock);
1505 }
1506 break;
1507 case NETDEV_UP:
1508 spin_lock(&po->bind_lock);
1509 if (dev->ifindex == po->ifindex && po->num &&
1510 !po->running) {
1511 dev_add_pack(&po->prot_hook);
1512 sock_hold(sk);
1513 po->running = 1;
1514 }
1515 spin_unlock(&po->bind_lock);
1516 break;
1517 }
1518 }
1519 read_unlock(&packet_sklist_lock);
1520 return NOTIFY_DONE;
1521}
1522
1523
1524static int packet_ioctl(struct socket *sock, unsigned int cmd,
1525 unsigned long arg)
1526{
1527 struct sock *sk = sock->sk;
1528
1529 switch(cmd) {
1530 case SIOCOUTQ:
1531 {
1532 int amount = atomic_read(&sk->sk_wmem_alloc);
1533 return put_user(amount, (int __user *)arg);
1534 }
1535 case SIOCINQ:
1536 {
1537 struct sk_buff *skb;
1538 int amount = 0;
1539
1540 spin_lock_bh(&sk->sk_receive_queue.lock);
1541 skb = skb_peek(&sk->sk_receive_queue);
1542 if (skb)
1543 amount = skb->len;
1544 spin_unlock_bh(&sk->sk_receive_queue.lock);
1545 return put_user(amount, (int __user *)arg);
1546 }
1547 case SIOCGSTAMP:
1548 return sock_get_timestamp(sk, (struct timeval __user *)arg);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001549
Linus Torvalds1da177e2005-04-16 15:20:36 -07001550#ifdef CONFIG_INET
1551 case SIOCADDRT:
1552 case SIOCDELRT:
1553 case SIOCDARP:
1554 case SIOCGARP:
1555 case SIOCSARP:
1556 case SIOCGIFADDR:
1557 case SIOCSIFADDR:
1558 case SIOCGIFBRDADDR:
1559 case SIOCSIFBRDADDR:
1560 case SIOCGIFNETMASK:
1561 case SIOCSIFNETMASK:
1562 case SIOCGIFDSTADDR:
1563 case SIOCSIFDSTADDR:
1564 case SIOCSIFFLAGS:
1565 return inet_dgram_ops.ioctl(sock, cmd, arg);
1566#endif
1567
1568 default:
Christoph Hellwigb5e5fa52006-01-03 14:18:33 -08001569 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001570 }
1571 return 0;
1572}
1573
1574#ifndef CONFIG_PACKET_MMAP
1575#define packet_mmap sock_no_mmap
1576#define packet_poll datagram_poll
1577#else
1578
1579static unsigned int packet_poll(struct file * file, struct socket *sock,
1580 poll_table *wait)
1581{
1582 struct sock *sk = sock->sk;
1583 struct packet_sock *po = pkt_sk(sk);
1584 unsigned int mask = datagram_poll(file, sock, wait);
1585
1586 spin_lock_bh(&sk->sk_receive_queue.lock);
1587 if (po->pg_vec) {
1588 unsigned last = po->head ? po->head-1 : po->frame_max;
1589 struct tpacket_hdr *h;
1590
1591 h = (struct tpacket_hdr *)packet_lookup_frame(po, last);
1592
1593 if (h->tp_status)
1594 mask |= POLLIN | POLLRDNORM;
1595 }
1596 spin_unlock_bh(&sk->sk_receive_queue.lock);
1597 return mask;
1598}
1599
1600
1601/* Dirty? Well, I still did not learn better way to account
1602 * for user mmaps.
1603 */
1604
1605static void packet_mm_open(struct vm_area_struct *vma)
1606{
1607 struct file *file = vma->vm_file;
Eric Dumazetb69aee02005-09-06 14:42:45 -07001608 struct socket * sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001609 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001610
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611 if (sk)
1612 atomic_inc(&pkt_sk(sk)->mapped);
1613}
1614
1615static void packet_mm_close(struct vm_area_struct *vma)
1616{
1617 struct file *file = vma->vm_file;
Eric Dumazetb69aee02005-09-06 14:42:45 -07001618 struct socket * sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001619 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001620
Linus Torvalds1da177e2005-04-16 15:20:36 -07001621 if (sk)
1622 atomic_dec(&pkt_sk(sk)->mapped);
1623}
1624
1625static struct vm_operations_struct packet_mmap_ops = {
1626 .open = packet_mm_open,
1627 .close =packet_mm_close,
1628};
1629
1630static inline struct page *pg_vec_endpage(char *one_pg_vec, unsigned int order)
1631{
1632 return virt_to_page(one_pg_vec + (PAGE_SIZE << order) - 1);
1633}
1634
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001635static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001636{
1637 int i;
1638
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001639 for (i = 0; i < len; i++) {
1640 if (likely(pg_vec[i]))
1641 free_pages((unsigned long) pg_vec[i], order);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001642 }
1643 kfree(pg_vec);
1644}
1645
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001646static inline char *alloc_one_pg_vec_page(unsigned long order)
1647{
1648 return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1649 order);
1650}
1651
1652static char **alloc_pg_vec(struct tpacket_req *req, int order)
1653{
1654 unsigned int block_nr = req->tp_block_nr;
1655 char **pg_vec;
1656 int i;
1657
1658 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1659 if (unlikely(!pg_vec))
1660 goto out;
1661
1662 for (i = 0; i < block_nr; i++) {
1663 pg_vec[i] = alloc_one_pg_vec_page(order);
1664 if (unlikely(!pg_vec[i]))
1665 goto out_free_pgvec;
1666 }
1667
1668out:
1669 return pg_vec;
1670
1671out_free_pgvec:
1672 free_pg_vec(pg_vec, order, block_nr);
1673 pg_vec = NULL;
1674 goto out;
1675}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001676
1677static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1678{
1679 char **pg_vec = NULL;
1680 struct packet_sock *po = pkt_sk(sk);
Al Viro0e11c912006-11-08 00:26:29 -08001681 int was_running, order = 0;
1682 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001683 int err = 0;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001684
Linus Torvalds1da177e2005-04-16 15:20:36 -07001685 if (req->tp_block_nr) {
1686 int i, l;
1687
1688 /* Sanity tests and some calculations */
1689
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001690 if (unlikely(po->pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001691 return -EBUSY;
1692
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001693 if (unlikely((int)req->tp_block_size <= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001695 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001696 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001697 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001698 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001699 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700 return -EINVAL;
1701
1702 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001703 if (unlikely(po->frames_per_block <= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001704 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001705 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1706 req->tp_frame_nr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001707 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001708
1709 err = -ENOMEM;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001710 order = get_order(req->tp_block_size);
1711 pg_vec = alloc_pg_vec(req, order);
1712 if (unlikely(!pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001713 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714
1715 l = 0;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001716 for (i = 0; i < req->tp_block_nr; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001717 char *ptr = pg_vec[i];
1718 struct tpacket_hdr *header;
1719 int k;
1720
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001721 for (k = 0; k < po->frames_per_block; k++) {
1722 header = (struct tpacket_hdr *) ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001723 header->tp_status = TP_STATUS_KERNEL;
1724 ptr += req->tp_frame_size;
1725 }
1726 }
1727 /* Done */
1728 } else {
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001729 if (unlikely(req->tp_frame_nr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001730 return -EINVAL;
1731 }
1732
1733 lock_sock(sk);
1734
1735 /* Detach socket from network */
1736 spin_lock(&po->bind_lock);
1737 was_running = po->running;
1738 num = po->num;
1739 if (was_running) {
1740 __dev_remove_pack(&po->prot_hook);
1741 po->num = 0;
1742 po->running = 0;
1743 __sock_put(sk);
1744 }
1745 spin_unlock(&po->bind_lock);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001746
Linus Torvalds1da177e2005-04-16 15:20:36 -07001747 synchronize_net();
1748
1749 err = -EBUSY;
1750 if (closing || atomic_read(&po->mapped) == 0) {
1751 err = 0;
1752#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1753
1754 spin_lock_bh(&sk->sk_receive_queue.lock);
1755 pg_vec = XC(po->pg_vec, pg_vec);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001756 po->frame_max = (req->tp_frame_nr - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001757 po->head = 0;
1758 po->frame_size = req->tp_frame_size;
1759 spin_unlock_bh(&sk->sk_receive_queue.lock);
1760
1761 order = XC(po->pg_vec_order, order);
1762 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1763
1764 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1765 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1766 skb_queue_purge(&sk->sk_receive_queue);
1767#undef XC
1768 if (atomic_read(&po->mapped))
1769 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1770 }
1771
1772 spin_lock(&po->bind_lock);
1773 if (was_running && !po->running) {
1774 sock_hold(sk);
1775 po->running = 1;
1776 po->num = num;
1777 dev_add_pack(&po->prot_hook);
1778 }
1779 spin_unlock(&po->bind_lock);
1780
1781 release_sock(sk);
1782
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 if (pg_vec)
1784 free_pg_vec(pg_vec, order, req->tp_block_nr);
1785out:
1786 return err;
1787}
1788
1789static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1790{
1791 struct sock *sk = sock->sk;
1792 struct packet_sock *po = pkt_sk(sk);
1793 unsigned long size;
1794 unsigned long start;
1795 int err = -EINVAL;
1796 int i;
1797
1798 if (vma->vm_pgoff)
1799 return -EINVAL;
1800
1801 size = vma->vm_end - vma->vm_start;
1802
1803 lock_sock(sk);
1804 if (po->pg_vec == NULL)
1805 goto out;
1806 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1807 goto out;
1808
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 start = vma->vm_start;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001810 for (i = 0; i < po->pg_vec_len; i++) {
1811 struct page *page = virt_to_page(po->pg_vec[i]);
1812 int pg_num;
1813
1814 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1815 err = vm_insert_page(vma, start, page);
1816 if (unlikely(err))
1817 goto out;
1818 start += PAGE_SIZE;
1819 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001820 }
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001821 atomic_inc(&po->mapped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001822 vma->vm_ops = &packet_mmap_ops;
1823 err = 0;
1824
1825out:
1826 release_sock(sk);
1827 return err;
1828}
1829#endif
1830
1831
1832#ifdef CONFIG_SOCK_PACKET
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08001833static const struct proto_ops packet_ops_spkt = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834 .family = PF_PACKET,
1835 .owner = THIS_MODULE,
1836 .release = packet_release,
1837 .bind = packet_bind_spkt,
1838 .connect = sock_no_connect,
1839 .socketpair = sock_no_socketpair,
1840 .accept = sock_no_accept,
1841 .getname = packet_getname_spkt,
1842 .poll = datagram_poll,
1843 .ioctl = packet_ioctl,
1844 .listen = sock_no_listen,
1845 .shutdown = sock_no_shutdown,
1846 .setsockopt = sock_no_setsockopt,
1847 .getsockopt = sock_no_getsockopt,
1848 .sendmsg = packet_sendmsg_spkt,
1849 .recvmsg = packet_recvmsg,
1850 .mmap = sock_no_mmap,
1851 .sendpage = sock_no_sendpage,
1852};
1853#endif
1854
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08001855static const struct proto_ops packet_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001856 .family = PF_PACKET,
1857 .owner = THIS_MODULE,
1858 .release = packet_release,
1859 .bind = packet_bind,
1860 .connect = sock_no_connect,
1861 .socketpair = sock_no_socketpair,
1862 .accept = sock_no_accept,
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001863 .getname = packet_getname,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001864 .poll = packet_poll,
1865 .ioctl = packet_ioctl,
1866 .listen = sock_no_listen,
1867 .shutdown = sock_no_shutdown,
1868 .setsockopt = packet_setsockopt,
1869 .getsockopt = packet_getsockopt,
1870 .sendmsg = packet_sendmsg,
1871 .recvmsg = packet_recvmsg,
1872 .mmap = packet_mmap,
1873 .sendpage = sock_no_sendpage,
1874};
1875
1876static struct net_proto_family packet_family_ops = {
1877 .family = PF_PACKET,
1878 .create = packet_create,
1879 .owner = THIS_MODULE,
1880};
1881
1882static struct notifier_block packet_netdev_notifier = {
1883 .notifier_call =packet_notifier,
1884};
1885
1886#ifdef CONFIG_PROC_FS
1887static inline struct sock *packet_seq_idx(loff_t off)
1888{
1889 struct sock *s;
1890 struct hlist_node *node;
1891
1892 sk_for_each(s, node, &packet_sklist) {
1893 if (!off--)
1894 return s;
1895 }
1896 return NULL;
1897}
1898
1899static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1900{
1901 read_lock(&packet_sklist_lock);
1902 return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1903}
1904
1905static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1906{
1907 ++*pos;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001908 return (v == SEQ_START_TOKEN)
1909 ? sk_head(&packet_sklist)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 : sk_next((struct sock*)v) ;
1911}
1912
1913static void packet_seq_stop(struct seq_file *seq, void *v)
1914{
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001915 read_unlock(&packet_sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001916}
1917
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001918static int packet_seq_show(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001919{
1920 if (v == SEQ_START_TOKEN)
1921 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
1922 else {
1923 struct sock *s = v;
1924 const struct packet_sock *po = pkt_sk(s);
1925
1926 seq_printf(seq,
1927 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
1928 s,
1929 atomic_read(&s->sk_refcnt),
1930 s->sk_type,
1931 ntohs(po->num),
1932 po->ifindex,
1933 po->running,
1934 atomic_read(&s->sk_rmem_alloc),
1935 sock_i_uid(s),
1936 sock_i_ino(s) );
1937 }
1938
1939 return 0;
1940}
1941
1942static struct seq_operations packet_seq_ops = {
1943 .start = packet_seq_start,
1944 .next = packet_seq_next,
1945 .stop = packet_seq_stop,
1946 .show = packet_seq_show,
1947};
1948
1949static int packet_seq_open(struct inode *inode, struct file *file)
1950{
1951 return seq_open(file, &packet_seq_ops);
1952}
1953
Arjan van de Venda7071d2007-02-12 00:55:36 -08001954static const struct file_operations packet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001955 .owner = THIS_MODULE,
1956 .open = packet_seq_open,
1957 .read = seq_read,
1958 .llseek = seq_lseek,
1959 .release = seq_release,
1960};
1961
1962#endif
1963
1964static void __exit packet_exit(void)
1965{
1966 proc_net_remove("packet");
1967 unregister_netdevice_notifier(&packet_netdev_notifier);
1968 sock_unregister(PF_PACKET);
1969 proto_unregister(&packet_proto);
1970}
1971
1972static int __init packet_init(void)
1973{
1974 int rc = proto_register(&packet_proto, 0);
1975
1976 if (rc != 0)
1977 goto out;
1978
1979 sock_register(&packet_family_ops);
1980 register_netdevice_notifier(&packet_netdev_notifier);
1981 proc_net_fops_create("packet", 0, &packet_seq_fops);
1982out:
1983 return rc;
1984}
1985
1986module_init(packet_init);
1987module_exit(packet_exit);
1988MODULE_LICENSE("GPL");
1989MODULE_ALIAS_NETPROTO(PF_PACKET);