blob: eb6be5030c70ea7e0bb8b28af520e34fa4f4805a [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
8 * Version: $Id: af_packet.c,v 1.61 2002/02/08 03:57:19 davem Exp $
9 *
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Alan Cox, <gw4pts@gw4pts.ampr.org>
13 *
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090014 * Fixes:
Linus Torvalds1da177e2005-04-16 15:20:36 -070015 * Alan Cox : verify_area() now used correctly
16 * Alan Cox : new skbuff lists, look ma no backlogs!
17 * Alan Cox : tidied skbuff lists.
18 * Alan Cox : Now uses generic datagram routines I
19 * added. Also fixed the peek/read crash
20 * from all old Linux datagram code.
21 * Alan Cox : Uses the improved datagram code.
22 * Alan Cox : Added NULL's for socket options.
23 * Alan Cox : Re-commented the code.
24 * Alan Cox : Use new kernel side addressing
25 * Rob Janssen : Correct MTU usage.
26 * Dave Platt : Counter leaks caused by incorrect
27 * interrupt locking and some slightly
28 * dubious gcc output. Can you read
29 * compiler: it said _VOLATILE_
30 * Richard Kooijman : Timestamp fixes.
31 * Alan Cox : New buffers. Use sk->mac.raw.
32 * Alan Cox : sendmsg/recvmsg support.
33 * Alan Cox : Protocol setting support
34 * Alexey Kuznetsov : Untied from IPv4 stack.
35 * Cyrus Durgin : Fixed kerneld for kmod.
36 * Michal Ostrowski : Module initialization cleanup.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090037 * Ulises Alonso : Frame number limit removal and
Linus Torvalds1da177e2005-04-16 15:20:36 -070038 * packet_set_ring memory leak.
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070039 * Eric Biederman : Allow for > 8 byte hardware addresses.
40 * The convention is that longer addresses
41 * will simply extend the hardware address
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090042 * byte arrays at the end of sockaddr_ll
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070043 * and packet_mreq.
Linus Torvalds1da177e2005-04-16 15:20:36 -070044 *
45 * This program is free software; you can redistribute it and/or
46 * modify it under the terms of the GNU General Public License
47 * as published by the Free Software Foundation; either version
48 * 2 of the License, or (at your option) any later version.
49 *
50 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090051
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070053#include <linux/mm.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080054#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070055#include <linux/fcntl.h>
56#include <linux/socket.h>
57#include <linux/in.h>
58#include <linux/inet.h>
59#include <linux/netdevice.h>
60#include <linux/if_packet.h>
61#include <linux/wireless.h>
Herbert Xuffbc6112007-02-04 23:33:10 -080062#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070063#include <linux/kmod.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020064#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070065#include <net/ip.h>
66#include <net/protocol.h>
67#include <linux/skbuff.h>
68#include <net/sock.h>
69#include <linux/errno.h>
70#include <linux/timer.h>
71#include <asm/system.h>
72#include <asm/uaccess.h>
73#include <asm/ioctls.h>
74#include <asm/page.h>
Al Viroa1f8e7f2006-10-19 16:08:53 -040075#include <asm/cacheflush.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070076#include <asm/io.h>
77#include <linux/proc_fs.h>
78#include <linux/seq_file.h>
79#include <linux/poll.h>
80#include <linux/module.h>
81#include <linux/init.h>
82
83#ifdef CONFIG_INET
84#include <net/inet_common.h>
85#endif
86
Linus Torvalds1da177e2005-04-16 15:20:36 -070087/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070088 Assumptions:
89 - if device has no dev->hard_header routine, it adds and removes ll header
90 inside itself. In this case ll header is invisible outside of device,
91 but higher levels still should reserve dev->hard_header_len.
92 Some devices are enough clever to reallocate skb, when header
93 will not fit to reserved space (tunnel), another ones are silly
94 (PPP).
95 - packet socket receives packets with pulled ll header,
96 so that SOCK_RAW should push it back.
97
98On receive:
99-----------
100
101Incoming, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700102 mac_header -> ll header
103 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700104
105Outgoing, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700106 mac_header -> ll header
107 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700108
109Incoming, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700110 mac_header -> UNKNOWN position. It is very likely, that it points to ll
111 header. PPP makes it, that is wrong, because introduce
YOSHIFUJI Hideakidb0c58f2007-07-19 10:44:35 +0900112 assymetry between rx and tx paths.
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700113 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115Outgoing, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700116 mac_header -> data. ll header is still not built!
117 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700118
119Resume
120 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
121
122
123On transmit:
124------------
125
126dev->hard_header != NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700127 mac_header -> ll header
128 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700129
130dev->hard_header == NULL (ll header is added by device, we cannot control it)
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700131 mac_header -> data
132 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700133
134 We should set nh.raw on output to correct posistion,
135 packet classifier depends on it.
136 */
137
138/* List of all packet sockets. */
139static HLIST_HEAD(packet_sklist);
140static DEFINE_RWLOCK(packet_sklist_lock);
141
Linus Torvalds1da177e2005-04-16 15:20:36 -0700142/* Private packet socket structures. */
143
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144struct packet_mclist
145{
146 struct packet_mclist *next;
147 int ifindex;
148 int count;
149 unsigned short type;
150 unsigned short alen;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700151 unsigned char addr[MAX_ADDR_LEN];
152};
153/* identical to struct packet_mreq except it has
154 * a longer address field.
155 */
156struct packet_mreq_max
157{
158 int mr_ifindex;
159 unsigned short mr_type;
160 unsigned short mr_alen;
161 unsigned char mr_address[MAX_ADDR_LEN];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162};
David S. Millera2efcfa2007-05-29 13:12:50 -0700163
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164#ifdef CONFIG_PACKET_MMAP
165static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing);
166#endif
167
168static void packet_flush_mclist(struct sock *sk);
169
170struct packet_sock {
171 /* struct sock has to be the first member of packet_sock */
172 struct sock sk;
173 struct tpacket_stats stats;
174#ifdef CONFIG_PACKET_MMAP
175 char * *pg_vec;
176 unsigned int head;
177 unsigned int frames_per_block;
178 unsigned int frame_size;
179 unsigned int frame_max;
180 int copy_thresh;
181#endif
182 struct packet_type prot_hook;
183 spinlock_t bind_lock;
Herbert Xu8dc41942007-02-04 23:31:32 -0800184 unsigned int running:1, /* prot_hook is attached*/
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700185 auxdata:1,
186 origdev:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700187 int ifindex; /* bound device */
Al Viro0e11c912006-11-08 00:26:29 -0800188 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189 struct packet_mclist *mclist;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190#ifdef CONFIG_PACKET_MMAP
191 atomic_t mapped;
192 unsigned int pg_vec_order;
193 unsigned int pg_vec_pages;
194 unsigned int pg_vec_len;
195#endif
196};
197
Herbert Xuffbc6112007-02-04 23:33:10 -0800198struct packet_skb_cb {
199 unsigned int origlen;
200 union {
201 struct sockaddr_pkt pkt;
202 struct sockaddr_ll ll;
203 } sa;
204};
205
206#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
Herbert Xu8dc41942007-02-04 23:31:32 -0800207
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208#ifdef CONFIG_PACKET_MMAP
209
Jason Lunzad930652007-02-20 23:19:54 -0800210static inline struct tpacket_hdr *packet_lookup_frame(struct packet_sock *po, unsigned int position)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700211{
212 unsigned int pg_vec_pos, frame_offset;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700213
214 pg_vec_pos = position / po->frames_per_block;
215 frame_offset = position % po->frames_per_block;
216
Jason Lunzad930652007-02-20 23:19:54 -0800217 return (struct tpacket_hdr *)(po->pg_vec[pg_vec_pos] + (frame_offset * po->frame_size));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700218}
219#endif
220
221static inline struct packet_sock *pkt_sk(struct sock *sk)
222{
223 return (struct packet_sock *)sk;
224}
225
226static void packet_sock_destruct(struct sock *sk)
227{
228 BUG_TRAP(!atomic_read(&sk->sk_rmem_alloc));
229 BUG_TRAP(!atomic_read(&sk->sk_wmem_alloc));
230
231 if (!sock_flag(sk, SOCK_DEAD)) {
232 printk("Attempt to release alive packet socket: %p\n", sk);
233 return;
234 }
235
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -0800236 sk_refcnt_debug_dec(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700237}
238
239
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800240static const struct proto_ops packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700241
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800242static const struct proto_ops packet_ops_spkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700243
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700244static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700245{
246 struct sock *sk;
247 struct sockaddr_pkt *spkt;
248
Eric W. Biedermane730c152007-09-17 11:53:39 -0700249 if (dev->nd_net != &init_net)
250 goto out;
251
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252 /*
253 * When we registered the protocol we saved the socket in the data
254 * field for just this event.
255 */
256
257 sk = pt->af_packet_priv;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900258
Linus Torvalds1da177e2005-04-16 15:20:36 -0700259 /*
260 * Yank back the headers [hope the device set this
261 * right or kerboom...]
262 *
263 * Incoming packets have ll header pulled,
264 * push it back.
265 *
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700266 * For outgoing ones skb->data == skb_mac_header(skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700267 * so that this procedure is noop.
268 */
269
270 if (skb->pkt_type == PACKET_LOOPBACK)
271 goto out;
272
273 if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL)
274 goto oom;
275
276 /* drop any routing info */
277 dst_release(skb->dst);
278 skb->dst = NULL;
279
Phil Oester84531c22005-07-12 11:57:52 -0700280 /* drop conntrack reference */
281 nf_reset(skb);
282
Herbert Xuffbc6112007-02-04 23:33:10 -0800283 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700284
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700285 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700286
287 /*
288 * The SOCK_PACKET socket receives _all_ frames.
289 */
290
291 spkt->spkt_family = dev->type;
292 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
293 spkt->spkt_protocol = skb->protocol;
294
295 /*
296 * Charge the memory to the socket. This is done specifically
297 * to prevent sockets using all the memory up.
298 */
299
300 if (sock_queue_rcv_skb(sk,skb) == 0)
301 return 0;
302
303out:
304 kfree_skb(skb);
305oom:
306 return 0;
307}
308
309
310/*
311 * Output a raw packet to a device layer. This bypasses all the other
312 * protocol layers and you must therefore supply it with a complete frame
313 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900314
Linus Torvalds1da177e2005-04-16 15:20:36 -0700315static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
316 struct msghdr *msg, size_t len)
317{
318 struct sock *sk = sock->sk;
319 struct sockaddr_pkt *saddr=(struct sockaddr_pkt *)msg->msg_name;
320 struct sk_buff *skb;
321 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -0800322 __be16 proto=0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700323 int err;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900324
Linus Torvalds1da177e2005-04-16 15:20:36 -0700325 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900326 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700327 */
328
329 if (saddr)
330 {
331 if (msg->msg_namelen < sizeof(struct sockaddr))
332 return(-EINVAL);
333 if (msg->msg_namelen==sizeof(struct sockaddr_pkt))
334 proto=saddr->spkt_protocol;
335 }
336 else
337 return(-ENOTCONN); /* SOCK_PACKET must be sent giving an address */
338
339 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900340 * Find the device first to size check it
Linus Torvalds1da177e2005-04-16 15:20:36 -0700341 */
342
343 saddr->spkt_device[13] = 0;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700344 dev = dev_get_by_name(&init_net, saddr->spkt_device);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700345 err = -ENODEV;
346 if (dev == NULL)
347 goto out_unlock;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900348
David S. Millerd5e76b02007-01-25 19:30:36 -0800349 err = -ENETDOWN;
350 if (!(dev->flags & IFF_UP))
351 goto out_unlock;
352
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353 /*
354 * You may not queue a frame bigger than the mtu. This is the lowest level
355 * raw protocol and you must do your own fragmentation at this level.
356 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900357
Linus Torvalds1da177e2005-04-16 15:20:36 -0700358 err = -EMSGSIZE;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800359 if (len > dev->mtu + dev->hard_header_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700360 goto out_unlock;
361
362 err = -ENOBUFS;
363 skb = sock_wmalloc(sk, len + LL_RESERVED_SPACE(dev), 0, GFP_KERNEL);
364
365 /*
366 * If the write buffer is full, then tough. At this level the user gets to
367 * deal with the problem - do your own algorithmic backoffs. That's far
368 * more flexible.
369 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900370
371 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700372 goto out_unlock;
373
374 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900375 * Fill it in
Linus Torvalds1da177e2005-04-16 15:20:36 -0700376 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900377
Linus Torvalds1da177e2005-04-16 15:20:36 -0700378 /* FIXME: Save some space for broken drivers that write a
379 * hard header at transmission time by themselves. PPP is the
380 * notable one here. This should really be fixed at the driver level.
381 */
382 skb_reserve(skb, LL_RESERVED_SPACE(dev));
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700383 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700384
385 /* Try to align data part correctly */
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700386 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700387 skb->data -= dev->hard_header_len;
388 skb->tail -= dev->hard_header_len;
389 if (len < dev->hard_header_len)
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700390 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700391 }
392
393 /* Returns -EFAULT on error */
394 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
395 skb->protocol = proto;
396 skb->dev = dev;
397 skb->priority = sk->sk_priority;
398 if (err)
399 goto out_free;
400
Linus Torvalds1da177e2005-04-16 15:20:36 -0700401 /*
402 * Now send it
403 */
404
405 dev_queue_xmit(skb);
406 dev_put(dev);
407 return(len);
408
409out_free:
410 kfree_skb(skb);
411out_unlock:
412 if (dev)
413 dev_put(dev);
414 return err;
415}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700416
David S. Millerdbcb5852007-01-24 15:21:02 -0800417static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
418 unsigned int res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700419{
420 struct sk_filter *filter;
421
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700422 rcu_read_lock_bh();
423 filter = rcu_dereference(sk->sk_filter);
David S. Millerdbcb5852007-01-24 15:21:02 -0800424 if (filter != NULL)
425 res = sk_run_filter(skb, filter->insns, filter->len);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700426 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700427
David S. Millerdbcb5852007-01-24 15:21:02 -0800428 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700429}
430
431/*
432 This function makes lazy skb cloning in hope that most of packets
433 are discarded by BPF.
434
435 Note tricky part: we DO mangle shared skb! skb->data, skb->len
436 and skb->cb are mangled. It works because (and until) packets
437 falling here are owned by current CPU. Output packets are cloned
438 by dev_queue_xmit_nit(), input packets are processed by net_bh
439 sequencially, so that if we return skb to original state on exit,
440 we will not harm anyone.
441 */
442
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700443static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700444{
445 struct sock *sk;
446 struct sockaddr_ll *sll;
447 struct packet_sock *po;
448 u8 * skb_head = skb->data;
449 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800450 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700451
Eric W. Biedermane730c152007-09-17 11:53:39 -0700452 if (dev->nd_net != &init_net)
453 goto drop;
454
Linus Torvalds1da177e2005-04-16 15:20:36 -0700455 if (skb->pkt_type == PACKET_LOOPBACK)
456 goto drop;
457
458 sk = pt->af_packet_priv;
459 po = pkt_sk(sk);
460
461 skb->dev = dev;
462
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700463 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 /* The device has an explicit notion of ll header,
465 exported to higher levels.
466
467 Otherwise, the device hides datails of it frame
468 structure, so that corresponding packet head
469 never delivered to user.
470 */
471 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700472 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700473 else if (skb->pkt_type == PACKET_OUTGOING) {
474 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300475 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476 }
477 }
478
479 snaplen = skb->len;
480
David S. Millerdbcb5852007-01-24 15:21:02 -0800481 res = run_filter(skb, sk, snaplen);
482 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700483 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800484 if (snaplen > res)
485 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700486
487 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
488 (unsigned)sk->sk_rcvbuf)
489 goto drop_n_acct;
490
491 if (skb_shared(skb)) {
492 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
493 if (nskb == NULL)
494 goto drop_n_acct;
495
496 if (skb_head != skb->data) {
497 skb->data = skb_head;
498 skb->len = skb_len;
499 }
500 kfree_skb(skb);
501 skb = nskb;
502 }
503
Herbert Xuffbc6112007-02-04 23:33:10 -0800504 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
505 sizeof(skb->cb));
506
507 sll = &PACKET_SKB_CB(skb)->sa.ll;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700508 sll->sll_family = AF_PACKET;
509 sll->sll_hatype = dev->type;
510 sll->sll_protocol = skb->protocol;
511 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800512 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700513 sll->sll_ifindex = orig_dev->ifindex;
514 else
515 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700516
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700517 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518
Herbert Xuffbc6112007-02-04 23:33:10 -0800519 PACKET_SKB_CB(skb)->origlen = skb->len;
Herbert Xu8dc41942007-02-04 23:31:32 -0800520
Linus Torvalds1da177e2005-04-16 15:20:36 -0700521 if (pskb_trim(skb, snaplen))
522 goto drop_n_acct;
523
524 skb_set_owner_r(skb, sk);
525 skb->dev = NULL;
526 dst_release(skb->dst);
527 skb->dst = NULL;
528
Phil Oester84531c22005-07-12 11:57:52 -0700529 /* drop conntrack reference */
530 nf_reset(skb);
531
Linus Torvalds1da177e2005-04-16 15:20:36 -0700532 spin_lock(&sk->sk_receive_queue.lock);
533 po->stats.tp_packets++;
534 __skb_queue_tail(&sk->sk_receive_queue, skb);
535 spin_unlock(&sk->sk_receive_queue.lock);
536 sk->sk_data_ready(sk, skb->len);
537 return 0;
538
539drop_n_acct:
540 spin_lock(&sk->sk_receive_queue.lock);
541 po->stats.tp_drops++;
542 spin_unlock(&sk->sk_receive_queue.lock);
543
544drop_n_restore:
545 if (skb_head != skb->data && skb_shared(skb)) {
546 skb->data = skb_head;
547 skb->len = skb_len;
548 }
549drop:
550 kfree_skb(skb);
551 return 0;
552}
553
554#ifdef CONFIG_PACKET_MMAP
David S. Millerf2ccd8f2005-08-09 19:34:12 -0700555static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556{
557 struct sock *sk;
558 struct packet_sock *po;
559 struct sockaddr_ll *sll;
560 struct tpacket_hdr *h;
561 u8 * skb_head = skb->data;
562 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800563 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700564 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
565 unsigned short macoff, netoff;
566 struct sk_buff *copy_skb = NULL;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -0700567 struct timeval tv;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700568
Eric W. Biedermane730c152007-09-17 11:53:39 -0700569 if (dev->nd_net != &init_net)
570 goto drop;
571
Linus Torvalds1da177e2005-04-16 15:20:36 -0700572 if (skb->pkt_type == PACKET_LOOPBACK)
573 goto drop;
574
575 sk = pt->af_packet_priv;
576 po = pkt_sk(sk);
577
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700578 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700579 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700580 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 else if (skb->pkt_type == PACKET_OUTGOING) {
582 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300583 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700584 }
585 }
586
Herbert Xu8dc41942007-02-04 23:31:32 -0800587 if (skb->ip_summed == CHECKSUM_PARTIAL)
588 status |= TP_STATUS_CSUMNOTREADY;
589
Linus Torvalds1da177e2005-04-16 15:20:36 -0700590 snaplen = skb->len;
591
David S. Millerdbcb5852007-01-24 15:21:02 -0800592 res = run_filter(skb, sk, snaplen);
593 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700594 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800595 if (snaplen > res)
596 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700597
598 if (sk->sk_type == SOCK_DGRAM) {
599 macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16;
600 } else {
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300601 unsigned maclen = skb_network_offset(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700602 netoff = TPACKET_ALIGN(TPACKET_HDRLEN + (maclen < 16 ? 16 : maclen));
603 macoff = netoff - maclen;
604 }
605
606 if (macoff + snaplen > po->frame_size) {
607 if (po->copy_thresh &&
608 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
609 (unsigned)sk->sk_rcvbuf) {
610 if (skb_shared(skb)) {
611 copy_skb = skb_clone(skb, GFP_ATOMIC);
612 } else {
613 copy_skb = skb_get(skb);
614 skb_head = skb->data;
615 }
616 if (copy_skb)
617 skb_set_owner_r(copy_skb, sk);
618 }
619 snaplen = po->frame_size - macoff;
620 if ((int)snaplen < 0)
621 snaplen = 0;
622 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623
624 spin_lock(&sk->sk_receive_queue.lock);
Jason Lunzad930652007-02-20 23:19:54 -0800625 h = packet_lookup_frame(po, po->head);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900626
Linus Torvalds1da177e2005-04-16 15:20:36 -0700627 if (h->tp_status)
628 goto ring_is_full;
629 po->head = po->head != po->frame_max ? po->head+1 : 0;
630 po->stats.tp_packets++;
631 if (copy_skb) {
632 status |= TP_STATUS_COPY;
633 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
634 }
635 if (!po->stats.tp_drops)
636 status &= ~TP_STATUS_LOSING;
637 spin_unlock(&sk->sk_receive_queue.lock);
638
Patrick McHardycbe21d82006-09-17 23:59:57 -0700639 skb_copy_bits(skb, 0, (u8*)h + macoff, snaplen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700640
641 h->tp_len = skb->len;
642 h->tp_snaplen = snaplen;
643 h->tp_mac = macoff;
644 h->tp_net = netoff;
Stephen Hemminger50f17782007-09-06 13:55:02 +0100645 if (skb->tstamp.tv64)
646 tv = ktime_to_timeval(skb->tstamp);
647 else
648 do_gettimeofday(&tv);
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -0700649 h->tp_sec = tv.tv_sec;
650 h->tp_usec = tv.tv_usec;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700651
652 sll = (struct sockaddr_ll*)((u8*)h + TPACKET_ALIGN(sizeof(*h)));
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700653 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 sll->sll_family = AF_PACKET;
655 sll->sll_hatype = dev->type;
656 sll->sll_protocol = skb->protocol;
657 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800658 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700659 sll->sll_ifindex = orig_dev->ifindex;
660 else
661 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700662
663 h->tp_status = status;
Ralf Baechlee16aa202006-12-07 00:11:33 -0800664 smp_mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700665
666 {
667 struct page *p_start, *p_end;
668 u8 *h_end = (u8 *)h + macoff + snaplen - 1;
669
670 p_start = virt_to_page(h);
671 p_end = virt_to_page(h_end);
672 while (p_start <= p_end) {
673 flush_dcache_page(p_start);
674 p_start++;
675 }
676 }
677
678 sk->sk_data_ready(sk, 0);
679
680drop_n_restore:
681 if (skb_head != skb->data && skb_shared(skb)) {
682 skb->data = skb_head;
683 skb->len = skb_len;
684 }
685drop:
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900686 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700687 return 0;
688
689ring_is_full:
690 po->stats.tp_drops++;
691 spin_unlock(&sk->sk_receive_queue.lock);
692
693 sk->sk_data_ready(sk, 0);
694 if (copy_skb)
695 kfree_skb(copy_skb);
696 goto drop_n_restore;
697}
698
699#endif
700
701
702static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
703 struct msghdr *msg, size_t len)
704{
705 struct sock *sk = sock->sk;
706 struct sockaddr_ll *saddr=(struct sockaddr_ll *)msg->msg_name;
707 struct sk_buff *skb;
708 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -0800709 __be16 proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 unsigned char *addr;
711 int ifindex, err, reserve = 0;
712
713 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900714 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900716
Linus Torvalds1da177e2005-04-16 15:20:36 -0700717 if (saddr == NULL) {
718 struct packet_sock *po = pkt_sk(sk);
719
720 ifindex = po->ifindex;
721 proto = po->num;
722 addr = NULL;
723 } else {
724 err = -EINVAL;
725 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
726 goto out;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700727 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
728 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700729 ifindex = saddr->sll_ifindex;
730 proto = saddr->sll_protocol;
731 addr = saddr->sll_addr;
732 }
733
734
Eric W. Biederman881d9662007-09-17 11:56:21 -0700735 dev = dev_get_by_index(&init_net, ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700736 err = -ENXIO;
737 if (dev == NULL)
738 goto out_unlock;
739 if (sock->type == SOCK_RAW)
740 reserve = dev->hard_header_len;
741
David S. Millerd5e76b02007-01-25 19:30:36 -0800742 err = -ENETDOWN;
743 if (!(dev->flags & IFF_UP))
744 goto out_unlock;
745
Linus Torvalds1da177e2005-04-16 15:20:36 -0700746 err = -EMSGSIZE;
747 if (len > dev->mtu+reserve)
748 goto out_unlock;
749
750 skb = sock_alloc_send_skb(sk, len + LL_RESERVED_SPACE(dev),
751 msg->msg_flags & MSG_DONTWAIT, &err);
752 if (skb==NULL)
753 goto out_unlock;
754
755 skb_reserve(skb, LL_RESERVED_SPACE(dev));
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -0700756 skb_reset_network_header(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700757
Stephen Hemminger0c4e8582007-10-09 01:36:32 -0700758 err = -EINVAL;
759 if (sock->type == SOCK_DGRAM &&
760 dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
761 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700762
763 /* Returns -EFAULT on error */
764 err = memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len);
765 if (err)
766 goto out_free;
767
768 skb->protocol = proto;
769 skb->dev = dev;
770 skb->priority = sk->sk_priority;
771
Linus Torvalds1da177e2005-04-16 15:20:36 -0700772 /*
773 * Now send it
774 */
775
776 err = dev_queue_xmit(skb);
777 if (err > 0 && (err = net_xmit_errno(err)) != 0)
778 goto out_unlock;
779
780 dev_put(dev);
781
782 return(len);
783
784out_free:
785 kfree_skb(skb);
786out_unlock:
787 if (dev)
788 dev_put(dev);
789out:
790 return err;
791}
792
793/*
794 * Close a PACKET socket. This is fairly simple. We immediately go
795 * to 'closed' state and remove our protocol entry in the device list.
796 */
797
798static int packet_release(struct socket *sock)
799{
800 struct sock *sk = sock->sk;
801 struct packet_sock *po;
802
803 if (!sk)
804 return 0;
805
806 po = pkt_sk(sk);
807
808 write_lock_bh(&packet_sklist_lock);
809 sk_del_node_init(sk);
810 write_unlock_bh(&packet_sklist_lock);
811
812 /*
813 * Unhook packet receive handler.
814 */
815
816 if (po->running) {
817 /*
818 * Remove the protocol hook
819 */
820 dev_remove_pack(&po->prot_hook);
821 po->running = 0;
822 po->num = 0;
823 __sock_put(sk);
824 }
825
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 packet_flush_mclist(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700827
828#ifdef CONFIG_PACKET_MMAP
829 if (po->pg_vec) {
830 struct tpacket_req req;
831 memset(&req, 0, sizeof(req));
832 packet_set_ring(sk, &req, 1);
833 }
834#endif
835
836 /*
837 * Now the socket is dead. No more input will appear.
838 */
839
840 sock_orphan(sk);
841 sock->sk = NULL;
842
843 /* Purge queues */
844
845 skb_queue_purge(&sk->sk_receive_queue);
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -0800846 sk_refcnt_debug_release(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700847
848 sock_put(sk);
849 return 0;
850}
851
852/*
853 * Attach a packet hook.
854 */
855
Al Viro0e11c912006-11-08 00:26:29 -0800856static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700857{
858 struct packet_sock *po = pkt_sk(sk);
859 /*
860 * Detach an existing hook if present.
861 */
862
863 lock_sock(sk);
864
865 spin_lock(&po->bind_lock);
866 if (po->running) {
867 __sock_put(sk);
868 po->running = 0;
869 po->num = 0;
870 spin_unlock(&po->bind_lock);
871 dev_remove_pack(&po->prot_hook);
872 spin_lock(&po->bind_lock);
873 }
874
875 po->num = protocol;
876 po->prot_hook.type = protocol;
877 po->prot_hook.dev = dev;
878
879 po->ifindex = dev ? dev->ifindex : 0;
880
881 if (protocol == 0)
882 goto out_unlock;
883
884 if (dev) {
885 if (dev->flags&IFF_UP) {
886 dev_add_pack(&po->prot_hook);
887 sock_hold(sk);
888 po->running = 1;
889 } else {
890 sk->sk_err = ENETDOWN;
891 if (!sock_flag(sk, SOCK_DEAD))
892 sk->sk_error_report(sk);
893 }
894 } else {
895 dev_add_pack(&po->prot_hook);
896 sock_hold(sk);
897 po->running = 1;
898 }
899
900out_unlock:
901 spin_unlock(&po->bind_lock);
902 release_sock(sk);
903 return 0;
904}
905
906/*
907 * Bind a packet socket to a device
908 */
909
Linus Torvalds1da177e2005-04-16 15:20:36 -0700910static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr, int addr_len)
911{
912 struct sock *sk=sock->sk;
913 char name[15];
914 struct net_device *dev;
915 int err = -ENODEV;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900916
Linus Torvalds1da177e2005-04-16 15:20:36 -0700917 /*
918 * Check legality
919 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900920
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800921 if (addr_len != sizeof(struct sockaddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -0700922 return -EINVAL;
923 strlcpy(name,uaddr->sa_data,sizeof(name));
924
Eric W. Biederman881d9662007-09-17 11:56:21 -0700925 dev = dev_get_by_name(&init_net, name);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700926 if (dev) {
927 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
928 dev_put(dev);
929 }
930 return err;
931}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700932
933static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
934{
935 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
936 struct sock *sk=sock->sk;
937 struct net_device *dev = NULL;
938 int err;
939
940
941 /*
942 * Check legality
943 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900944
Linus Torvalds1da177e2005-04-16 15:20:36 -0700945 if (addr_len < sizeof(struct sockaddr_ll))
946 return -EINVAL;
947 if (sll->sll_family != AF_PACKET)
948 return -EINVAL;
949
950 if (sll->sll_ifindex) {
951 err = -ENODEV;
Eric W. Biederman881d9662007-09-17 11:56:21 -0700952 dev = dev_get_by_index(&init_net, sll->sll_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700953 if (dev == NULL)
954 goto out;
955 }
956 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
957 if (dev)
958 dev_put(dev);
959
960out:
961 return err;
962}
963
964static struct proto packet_proto = {
965 .name = "PACKET",
966 .owner = THIS_MODULE,
967 .obj_size = sizeof(struct packet_sock),
968};
969
970/*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900971 * Create a packet of type SOCK_PACKET.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700972 */
973
Eric W. Biederman1b8d7ae2007-10-08 23:24:22 -0700974static int packet_create(struct net *net, struct socket *sock, int protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700975{
976 struct sock *sk;
977 struct packet_sock *po;
Al Viro0e11c912006-11-08 00:26:29 -0800978 __be16 proto = (__force __be16)protocol; /* weird, but documented */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700979 int err;
980
Eric W. Biederman1b8d7ae2007-10-08 23:24:22 -0700981 if (net != &init_net)
982 return -EAFNOSUPPORT;
983
Linus Torvalds1da177e2005-04-16 15:20:36 -0700984 if (!capable(CAP_NET_RAW))
985 return -EPERM;
David S. Millerbe020972007-05-29 13:16:31 -0700986 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
987 sock->type != SOCK_PACKET)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700988 return -ESOCKTNOSUPPORT;
989
990 sock->state = SS_UNCONNECTED;
991
992 err = -ENOBUFS;
Pavel Emelyanov6257ff22007-11-01 00:39:31 -0700993 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700994 if (sk == NULL)
995 goto out;
996
997 sock->ops = &packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700998 if (sock->type == SOCK_PACKET)
999 sock->ops = &packet_ops_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001000
Linus Torvalds1da177e2005-04-16 15:20:36 -07001001 sock_init_data(sock, sk);
1002
1003 po = pkt_sk(sk);
1004 sk->sk_family = PF_PACKET;
Al Viro0e11c912006-11-08 00:26:29 -08001005 po->num = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001006
1007 sk->sk_destruct = packet_sock_destruct;
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001008 sk_refcnt_debug_inc(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001009
1010 /*
1011 * Attach a protocol block
1012 */
1013
1014 spin_lock_init(&po->bind_lock);
1015 po->prot_hook.func = packet_rcv;
David S. Millerbe020972007-05-29 13:16:31 -07001016
Linus Torvalds1da177e2005-04-16 15:20:36 -07001017 if (sock->type == SOCK_PACKET)
1018 po->prot_hook.func = packet_rcv_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001019
Linus Torvalds1da177e2005-04-16 15:20:36 -07001020 po->prot_hook.af_packet_priv = sk;
1021
Al Viro0e11c912006-11-08 00:26:29 -08001022 if (proto) {
1023 po->prot_hook.type = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001024 dev_add_pack(&po->prot_hook);
1025 sock_hold(sk);
1026 po->running = 1;
1027 }
1028
1029 write_lock_bh(&packet_sklist_lock);
1030 sk_add_node(sk, &packet_sklist);
1031 write_unlock_bh(&packet_sklist_lock);
1032 return(0);
1033out:
1034 return err;
1035}
1036
1037/*
1038 * Pull a packet from our receive queue and hand it to the user.
1039 * If necessary we block.
1040 */
1041
1042static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1043 struct msghdr *msg, size_t len, int flags)
1044{
1045 struct sock *sk = sock->sk;
1046 struct sk_buff *skb;
1047 int copied, err;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001048 struct sockaddr_ll *sll;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001049
1050 err = -EINVAL;
1051 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
1052 goto out;
1053
1054#if 0
1055 /* What error should we return now? EUNATTACH? */
1056 if (pkt_sk(sk)->ifindex < 0)
1057 return -ENODEV;
1058#endif
1059
1060 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001061 * Call the generic datagram receiver. This handles all sorts
1062 * of horrible races and re-entrancy so we can forget about it
1063 * in the protocol layers.
1064 *
1065 * Now it will return ENETDOWN, if device have just gone down,
1066 * but then it will block.
1067 */
1068
1069 skb=skb_recv_datagram(sk,flags,flags&MSG_DONTWAIT,&err);
1070
1071 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001072 * An error occurred so return it. Because skb_recv_datagram()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001073 * handles the blocking we don't see and worry about blocking
1074 * retries.
1075 */
1076
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001077 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001078 goto out;
1079
1080 /*
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001081 * If the address length field is there to be filled in, we fill
1082 * it in now.
1083 */
1084
Herbert Xuffbc6112007-02-04 23:33:10 -08001085 sll = &PACKET_SKB_CB(skb)->sa.ll;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001086 if (sock->type == SOCK_PACKET)
1087 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1088 else
1089 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1090
1091 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001092 * You lose any data beyond the buffer you gave. If it worries a
1093 * user program they can ask the device for its MTU anyway.
1094 */
1095
1096 copied = skb->len;
1097 if (copied > len)
1098 {
1099 copied=len;
1100 msg->msg_flags|=MSG_TRUNC;
1101 }
1102
1103 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1104 if (err)
1105 goto out_free;
1106
1107 sock_recv_timestamp(msg, sk, skb);
1108
1109 if (msg->msg_name)
Herbert Xuffbc6112007-02-04 23:33:10 -08001110 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1111 msg->msg_namelen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001112
Herbert Xu8dc41942007-02-04 23:31:32 -08001113 if (pkt_sk(sk)->auxdata) {
Herbert Xuffbc6112007-02-04 23:33:10 -08001114 struct tpacket_auxdata aux;
1115
1116 aux.tp_status = TP_STATUS_USER;
1117 if (skb->ip_summed == CHECKSUM_PARTIAL)
1118 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1119 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1120 aux.tp_snaplen = skb->len;
1121 aux.tp_mac = 0;
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001122 aux.tp_net = skb_network_offset(skb);
Herbert Xuffbc6112007-02-04 23:33:10 -08001123
1124 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
Herbert Xu8dc41942007-02-04 23:31:32 -08001125 }
1126
Linus Torvalds1da177e2005-04-16 15:20:36 -07001127 /*
1128 * Free or return the buffer as appropriate. Again this
1129 * hides all the races and re-entrancy issues from us.
1130 */
1131 err = (flags&MSG_TRUNC) ? skb->len : copied;
1132
1133out_free:
1134 skb_free_datagram(sk, skb);
1135out:
1136 return err;
1137}
1138
Linus Torvalds1da177e2005-04-16 15:20:36 -07001139static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1140 int *uaddr_len, int peer)
1141{
1142 struct net_device *dev;
1143 struct sock *sk = sock->sk;
1144
1145 if (peer)
1146 return -EOPNOTSUPP;
1147
1148 uaddr->sa_family = AF_PACKET;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001149 dev = dev_get_by_index(&init_net, pkt_sk(sk)->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001150 if (dev) {
1151 strlcpy(uaddr->sa_data, dev->name, 15);
1152 dev_put(dev);
1153 } else
1154 memset(uaddr->sa_data, 0, 14);
1155 *uaddr_len = sizeof(*uaddr);
1156
1157 return 0;
1158}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001159
1160static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1161 int *uaddr_len, int peer)
1162{
1163 struct net_device *dev;
1164 struct sock *sk = sock->sk;
1165 struct packet_sock *po = pkt_sk(sk);
1166 struct sockaddr_ll *sll = (struct sockaddr_ll*)uaddr;
1167
1168 if (peer)
1169 return -EOPNOTSUPP;
1170
1171 sll->sll_family = AF_PACKET;
1172 sll->sll_ifindex = po->ifindex;
1173 sll->sll_protocol = po->num;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001174 dev = dev_get_by_index(&init_net, po->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001175 if (dev) {
1176 sll->sll_hatype = dev->type;
1177 sll->sll_halen = dev->addr_len;
1178 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
1179 dev_put(dev);
1180 } else {
1181 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1182 sll->sll_halen = 0;
1183 }
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001184 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001185
1186 return 0;
1187}
1188
Linus Torvalds1da177e2005-04-16 15:20:36 -07001189static void packet_dev_mc(struct net_device *dev, struct packet_mclist *i, int what)
1190{
1191 switch (i->type) {
1192 case PACKET_MR_MULTICAST:
1193 if (what > 0)
1194 dev_mc_add(dev, i->addr, i->alen, 0);
1195 else
1196 dev_mc_delete(dev, i->addr, i->alen, 0);
1197 break;
1198 case PACKET_MR_PROMISC:
1199 dev_set_promiscuity(dev, what);
1200 break;
1201 case PACKET_MR_ALLMULTI:
1202 dev_set_allmulti(dev, what);
1203 break;
1204 default:;
1205 }
1206}
1207
1208static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1209{
1210 for ( ; i; i=i->next) {
1211 if (i->ifindex == dev->ifindex)
1212 packet_dev_mc(dev, i, what);
1213 }
1214}
1215
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001216static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217{
1218 struct packet_sock *po = pkt_sk(sk);
1219 struct packet_mclist *ml, *i;
1220 struct net_device *dev;
1221 int err;
1222
1223 rtnl_lock();
1224
1225 err = -ENODEV;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001226 dev = __dev_get_by_index(&init_net, mreq->mr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001227 if (!dev)
1228 goto done;
1229
1230 err = -EINVAL;
1231 if (mreq->mr_alen > dev->addr_len)
1232 goto done;
1233
1234 err = -ENOBUFS;
Kris Katterjohn8b3a7002006-01-11 15:56:43 -08001235 i = kmalloc(sizeof(*i), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001236 if (i == NULL)
1237 goto done;
1238
1239 err = 0;
1240 for (ml = po->mclist; ml; ml = ml->next) {
1241 if (ml->ifindex == mreq->mr_ifindex &&
1242 ml->type == mreq->mr_type &&
1243 ml->alen == mreq->mr_alen &&
1244 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1245 ml->count++;
1246 /* Free the new element ... */
1247 kfree(i);
1248 goto done;
1249 }
1250 }
1251
1252 i->type = mreq->mr_type;
1253 i->ifindex = mreq->mr_ifindex;
1254 i->alen = mreq->mr_alen;
1255 memcpy(i->addr, mreq->mr_address, i->alen);
1256 i->count = 1;
1257 i->next = po->mclist;
1258 po->mclist = i;
1259 packet_dev_mc(dev, i, +1);
1260
1261done:
1262 rtnl_unlock();
1263 return err;
1264}
1265
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001266static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001267{
1268 struct packet_mclist *ml, **mlp;
1269
1270 rtnl_lock();
1271
1272 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1273 if (ml->ifindex == mreq->mr_ifindex &&
1274 ml->type == mreq->mr_type &&
1275 ml->alen == mreq->mr_alen &&
1276 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1277 if (--ml->count == 0) {
1278 struct net_device *dev;
1279 *mlp = ml->next;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001280 dev = dev_get_by_index(&init_net, ml->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001281 if (dev) {
1282 packet_dev_mc(dev, ml, -1);
1283 dev_put(dev);
1284 }
1285 kfree(ml);
1286 }
1287 rtnl_unlock();
1288 return 0;
1289 }
1290 }
1291 rtnl_unlock();
1292 return -EADDRNOTAVAIL;
1293}
1294
1295static void packet_flush_mclist(struct sock *sk)
1296{
1297 struct packet_sock *po = pkt_sk(sk);
1298 struct packet_mclist *ml;
1299
1300 if (!po->mclist)
1301 return;
1302
1303 rtnl_lock();
1304 while ((ml = po->mclist) != NULL) {
1305 struct net_device *dev;
1306
1307 po->mclist = ml->next;
Eric W. Biederman881d9662007-09-17 11:56:21 -07001308 if ((dev = dev_get_by_index(&init_net, ml->ifindex)) != NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 packet_dev_mc(dev, ml, -1);
1310 dev_put(dev);
1311 }
1312 kfree(ml);
1313 }
1314 rtnl_unlock();
1315}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316
1317static int
1318packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, int optlen)
1319{
1320 struct sock *sk = sock->sk;
Herbert Xu8dc41942007-02-04 23:31:32 -08001321 struct packet_sock *po = pkt_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001322 int ret;
1323
1324 if (level != SOL_PACKET)
1325 return -ENOPROTOOPT;
1326
1327 switch(optname) {
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001328 case PACKET_ADD_MEMBERSHIP:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001329 case PACKET_DROP_MEMBERSHIP:
1330 {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001331 struct packet_mreq_max mreq;
1332 int len = optlen;
1333 memset(&mreq, 0, sizeof(mreq));
1334 if (len < sizeof(struct packet_mreq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001335 return -EINVAL;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001336 if (len > sizeof(mreq))
1337 len = sizeof(mreq);
1338 if (copy_from_user(&mreq,optval,len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001339 return -EFAULT;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001340 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1341 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001342 if (optname == PACKET_ADD_MEMBERSHIP)
1343 ret = packet_mc_add(sk, &mreq);
1344 else
1345 ret = packet_mc_drop(sk, &mreq);
1346 return ret;
1347 }
David S. Millera2efcfa2007-05-29 13:12:50 -07001348
Linus Torvalds1da177e2005-04-16 15:20:36 -07001349#ifdef CONFIG_PACKET_MMAP
1350 case PACKET_RX_RING:
1351 {
1352 struct tpacket_req req;
1353
1354 if (optlen<sizeof(req))
1355 return -EINVAL;
1356 if (copy_from_user(&req,optval,sizeof(req)))
1357 return -EFAULT;
1358 return packet_set_ring(sk, &req, 0);
1359 }
1360 case PACKET_COPY_THRESH:
1361 {
1362 int val;
1363
1364 if (optlen!=sizeof(val))
1365 return -EINVAL;
1366 if (copy_from_user(&val,optval,sizeof(val)))
1367 return -EFAULT;
1368
1369 pkt_sk(sk)->copy_thresh = val;
1370 return 0;
1371 }
1372#endif
Herbert Xu8dc41942007-02-04 23:31:32 -08001373 case PACKET_AUXDATA:
1374 {
1375 int val;
1376
1377 if (optlen < sizeof(val))
1378 return -EINVAL;
1379 if (copy_from_user(&val, optval, sizeof(val)))
1380 return -EFAULT;
1381
1382 po->auxdata = !!val;
1383 return 0;
1384 }
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07001385 case PACKET_ORIGDEV:
1386 {
1387 int val;
1388
1389 if (optlen < sizeof(val))
1390 return -EINVAL;
1391 if (copy_from_user(&val, optval, sizeof(val)))
1392 return -EFAULT;
1393
1394 po->origdev = !!val;
1395 return 0;
1396 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397 default:
1398 return -ENOPROTOOPT;
1399 }
1400}
1401
1402static int packet_getsockopt(struct socket *sock, int level, int optname,
1403 char __user *optval, int __user *optlen)
1404{
1405 int len;
Herbert Xu8dc41942007-02-04 23:31:32 -08001406 int val;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001407 struct sock *sk = sock->sk;
1408 struct packet_sock *po = pkt_sk(sk);
Herbert Xu8dc41942007-02-04 23:31:32 -08001409 void *data;
1410 struct tpacket_stats st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001411
1412 if (level != SOL_PACKET)
1413 return -ENOPROTOOPT;
1414
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001415 if (get_user(len, optlen))
1416 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001417
1418 if (len < 0)
1419 return -EINVAL;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001420
Linus Torvalds1da177e2005-04-16 15:20:36 -07001421 switch(optname) {
1422 case PACKET_STATISTICS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 if (len > sizeof(struct tpacket_stats))
1424 len = sizeof(struct tpacket_stats);
1425 spin_lock_bh(&sk->sk_receive_queue.lock);
1426 st = po->stats;
1427 memset(&po->stats, 0, sizeof(st));
1428 spin_unlock_bh(&sk->sk_receive_queue.lock);
1429 st.tp_packets += st.tp_drops;
1430
Herbert Xu8dc41942007-02-04 23:31:32 -08001431 data = &st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001432 break;
Herbert Xu8dc41942007-02-04 23:31:32 -08001433 case PACKET_AUXDATA:
1434 if (len > sizeof(int))
1435 len = sizeof(int);
1436 val = po->auxdata;
1437
1438 data = &val;
1439 break;
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07001440 case PACKET_ORIGDEV:
1441 if (len > sizeof(int))
1442 len = sizeof(int);
1443 val = po->origdev;
1444
1445 data = &val;
1446 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001447 default:
1448 return -ENOPROTOOPT;
1449 }
1450
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001451 if (put_user(len, optlen))
1452 return -EFAULT;
Herbert Xu8dc41942007-02-04 23:31:32 -08001453 if (copy_to_user(optval, data, len))
1454 return -EFAULT;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001455 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001456}
1457
1458
1459static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
1460{
1461 struct sock *sk;
1462 struct hlist_node *node;
Jason Lunzad930652007-02-20 23:19:54 -08001463 struct net_device *dev = data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001464
Eric W. Biedermane9dc8652007-09-12 13:02:17 +02001465 if (dev->nd_net != &init_net)
1466 return NOTIFY_DONE;
1467
Linus Torvalds1da177e2005-04-16 15:20:36 -07001468 read_lock(&packet_sklist_lock);
1469 sk_for_each(sk, node, &packet_sklist) {
1470 struct packet_sock *po = pkt_sk(sk);
1471
1472 switch (msg) {
1473 case NETDEV_UNREGISTER:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001474 if (po->mclist)
1475 packet_dev_mclist(dev, po->mclist, -1);
David S. Millera2efcfa2007-05-29 13:12:50 -07001476 /* fallthrough */
1477
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 case NETDEV_DOWN:
1479 if (dev->ifindex == po->ifindex) {
1480 spin_lock(&po->bind_lock);
1481 if (po->running) {
1482 __dev_remove_pack(&po->prot_hook);
1483 __sock_put(sk);
1484 po->running = 0;
1485 sk->sk_err = ENETDOWN;
1486 if (!sock_flag(sk, SOCK_DEAD))
1487 sk->sk_error_report(sk);
1488 }
1489 if (msg == NETDEV_UNREGISTER) {
1490 po->ifindex = -1;
1491 po->prot_hook.dev = NULL;
1492 }
1493 spin_unlock(&po->bind_lock);
1494 }
1495 break;
1496 case NETDEV_UP:
1497 spin_lock(&po->bind_lock);
1498 if (dev->ifindex == po->ifindex && po->num &&
1499 !po->running) {
1500 dev_add_pack(&po->prot_hook);
1501 sock_hold(sk);
1502 po->running = 1;
1503 }
1504 spin_unlock(&po->bind_lock);
1505 break;
1506 }
1507 }
1508 read_unlock(&packet_sklist_lock);
1509 return NOTIFY_DONE;
1510}
1511
1512
1513static int packet_ioctl(struct socket *sock, unsigned int cmd,
1514 unsigned long arg)
1515{
1516 struct sock *sk = sock->sk;
1517
1518 switch(cmd) {
1519 case SIOCOUTQ:
1520 {
1521 int amount = atomic_read(&sk->sk_wmem_alloc);
1522 return put_user(amount, (int __user *)arg);
1523 }
1524 case SIOCINQ:
1525 {
1526 struct sk_buff *skb;
1527 int amount = 0;
1528
1529 spin_lock_bh(&sk->sk_receive_queue.lock);
1530 skb = skb_peek(&sk->sk_receive_queue);
1531 if (skb)
1532 amount = skb->len;
1533 spin_unlock_bh(&sk->sk_receive_queue.lock);
1534 return put_user(amount, (int __user *)arg);
1535 }
1536 case SIOCGSTAMP:
1537 return sock_get_timestamp(sk, (struct timeval __user *)arg);
Eric Dumazetae40eb12007-03-18 17:33:16 -07001538 case SIOCGSTAMPNS:
1539 return sock_get_timestampns(sk, (struct timespec __user *)arg);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001540
Linus Torvalds1da177e2005-04-16 15:20:36 -07001541#ifdef CONFIG_INET
1542 case SIOCADDRT:
1543 case SIOCDELRT:
1544 case SIOCDARP:
1545 case SIOCGARP:
1546 case SIOCSARP:
1547 case SIOCGIFADDR:
1548 case SIOCSIFADDR:
1549 case SIOCGIFBRDADDR:
1550 case SIOCSIFBRDADDR:
1551 case SIOCGIFNETMASK:
1552 case SIOCSIFNETMASK:
1553 case SIOCGIFDSTADDR:
1554 case SIOCSIFDSTADDR:
1555 case SIOCSIFFLAGS:
1556 return inet_dgram_ops.ioctl(sock, cmd, arg);
1557#endif
1558
1559 default:
Christoph Hellwigb5e5fa52006-01-03 14:18:33 -08001560 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001561 }
1562 return 0;
1563}
1564
1565#ifndef CONFIG_PACKET_MMAP
1566#define packet_mmap sock_no_mmap
1567#define packet_poll datagram_poll
1568#else
1569
1570static unsigned int packet_poll(struct file * file, struct socket *sock,
1571 poll_table *wait)
1572{
1573 struct sock *sk = sock->sk;
1574 struct packet_sock *po = pkt_sk(sk);
1575 unsigned int mask = datagram_poll(file, sock, wait);
1576
1577 spin_lock_bh(&sk->sk_receive_queue.lock);
1578 if (po->pg_vec) {
1579 unsigned last = po->head ? po->head-1 : po->frame_max;
1580 struct tpacket_hdr *h;
1581
Jason Lunzad930652007-02-20 23:19:54 -08001582 h = packet_lookup_frame(po, last);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001583
1584 if (h->tp_status)
1585 mask |= POLLIN | POLLRDNORM;
1586 }
1587 spin_unlock_bh(&sk->sk_receive_queue.lock);
1588 return mask;
1589}
1590
1591
1592/* Dirty? Well, I still did not learn better way to account
1593 * for user mmaps.
1594 */
1595
1596static void packet_mm_open(struct vm_area_struct *vma)
1597{
1598 struct file *file = vma->vm_file;
Eric Dumazetb69aee02005-09-06 14:42:45 -07001599 struct socket * sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001600 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001601
Linus Torvalds1da177e2005-04-16 15:20:36 -07001602 if (sk)
1603 atomic_inc(&pkt_sk(sk)->mapped);
1604}
1605
1606static void packet_mm_close(struct vm_area_struct *vma)
1607{
1608 struct file *file = vma->vm_file;
Eric Dumazetb69aee02005-09-06 14:42:45 -07001609 struct socket * sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001610 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001611
Linus Torvalds1da177e2005-04-16 15:20:36 -07001612 if (sk)
1613 atomic_dec(&pkt_sk(sk)->mapped);
1614}
1615
1616static struct vm_operations_struct packet_mmap_ops = {
1617 .open = packet_mm_open,
1618 .close =packet_mm_close,
1619};
1620
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001621static void free_pg_vec(char **pg_vec, unsigned int order, unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001622{
1623 int i;
1624
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001625 for (i = 0; i < len; i++) {
1626 if (likely(pg_vec[i]))
1627 free_pages((unsigned long) pg_vec[i], order);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001628 }
1629 kfree(pg_vec);
1630}
1631
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001632static inline char *alloc_one_pg_vec_page(unsigned long order)
1633{
1634 return (char *) __get_free_pages(GFP_KERNEL | __GFP_COMP | __GFP_ZERO,
1635 order);
1636}
1637
1638static char **alloc_pg_vec(struct tpacket_req *req, int order)
1639{
1640 unsigned int block_nr = req->tp_block_nr;
1641 char **pg_vec;
1642 int i;
1643
1644 pg_vec = kzalloc(block_nr * sizeof(char *), GFP_KERNEL);
1645 if (unlikely(!pg_vec))
1646 goto out;
1647
1648 for (i = 0; i < block_nr; i++) {
1649 pg_vec[i] = alloc_one_pg_vec_page(order);
1650 if (unlikely(!pg_vec[i]))
1651 goto out_free_pgvec;
1652 }
1653
1654out:
1655 return pg_vec;
1656
1657out_free_pgvec:
1658 free_pg_vec(pg_vec, order, block_nr);
1659 pg_vec = NULL;
1660 goto out;
1661}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662
1663static int packet_set_ring(struct sock *sk, struct tpacket_req *req, int closing)
1664{
1665 char **pg_vec = NULL;
1666 struct packet_sock *po = pkt_sk(sk);
Al Viro0e11c912006-11-08 00:26:29 -08001667 int was_running, order = 0;
1668 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001669 int err = 0;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001670
Linus Torvalds1da177e2005-04-16 15:20:36 -07001671 if (req->tp_block_nr) {
1672 int i, l;
1673
1674 /* Sanity tests and some calculations */
1675
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001676 if (unlikely(po->pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001677 return -EBUSY;
1678
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001679 if (unlikely((int)req->tp_block_size <= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001680 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001681 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001683 if (unlikely(req->tp_frame_size < TPACKET_HDRLEN))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001684 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001685 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001686 return -EINVAL;
1687
1688 po->frames_per_block = req->tp_block_size/req->tp_frame_size;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001689 if (unlikely(po->frames_per_block <= 0))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001690 return -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001691 if (unlikely((po->frames_per_block * req->tp_block_nr) !=
1692 req->tp_frame_nr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001694
1695 err = -ENOMEM;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001696 order = get_order(req->tp_block_size);
1697 pg_vec = alloc_pg_vec(req, order);
1698 if (unlikely(!pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001699 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001700
1701 l = 0;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001702 for (i = 0; i < req->tp_block_nr; i++) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001703 char *ptr = pg_vec[i];
1704 struct tpacket_hdr *header;
1705 int k;
1706
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001707 for (k = 0; k < po->frames_per_block; k++) {
1708 header = (struct tpacket_hdr *) ptr;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 header->tp_status = TP_STATUS_KERNEL;
1710 ptr += req->tp_frame_size;
1711 }
1712 }
1713 /* Done */
1714 } else {
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001715 if (unlikely(req->tp_frame_nr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001716 return -EINVAL;
1717 }
1718
1719 lock_sock(sk);
1720
1721 /* Detach socket from network */
1722 spin_lock(&po->bind_lock);
1723 was_running = po->running;
1724 num = po->num;
1725 if (was_running) {
1726 __dev_remove_pack(&po->prot_hook);
1727 po->num = 0;
1728 po->running = 0;
1729 __sock_put(sk);
1730 }
1731 spin_unlock(&po->bind_lock);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001732
Linus Torvalds1da177e2005-04-16 15:20:36 -07001733 synchronize_net();
1734
1735 err = -EBUSY;
1736 if (closing || atomic_read(&po->mapped) == 0) {
1737 err = 0;
1738#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
1739
1740 spin_lock_bh(&sk->sk_receive_queue.lock);
1741 pg_vec = XC(po->pg_vec, pg_vec);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001742 po->frame_max = (req->tp_frame_nr - 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001743 po->head = 0;
1744 po->frame_size = req->tp_frame_size;
1745 spin_unlock_bh(&sk->sk_receive_queue.lock);
1746
1747 order = XC(po->pg_vec_order, order);
1748 req->tp_block_nr = XC(po->pg_vec_len, req->tp_block_nr);
1749
1750 po->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
1751 po->prot_hook.func = po->pg_vec ? tpacket_rcv : packet_rcv;
1752 skb_queue_purge(&sk->sk_receive_queue);
1753#undef XC
1754 if (atomic_read(&po->mapped))
1755 printk(KERN_DEBUG "packet_mmap: vma is busy: %d\n", atomic_read(&po->mapped));
1756 }
1757
1758 spin_lock(&po->bind_lock);
1759 if (was_running && !po->running) {
1760 sock_hold(sk);
1761 po->running = 1;
1762 po->num = num;
1763 dev_add_pack(&po->prot_hook);
1764 }
1765 spin_unlock(&po->bind_lock);
1766
1767 release_sock(sk);
1768
Linus Torvalds1da177e2005-04-16 15:20:36 -07001769 if (pg_vec)
1770 free_pg_vec(pg_vec, order, req->tp_block_nr);
1771out:
1772 return err;
1773}
1774
1775static int packet_mmap(struct file *file, struct socket *sock, struct vm_area_struct *vma)
1776{
1777 struct sock *sk = sock->sk;
1778 struct packet_sock *po = pkt_sk(sk);
1779 unsigned long size;
1780 unsigned long start;
1781 int err = -EINVAL;
1782 int i;
1783
1784 if (vma->vm_pgoff)
1785 return -EINVAL;
1786
1787 size = vma->vm_end - vma->vm_start;
1788
1789 lock_sock(sk);
1790 if (po->pg_vec == NULL)
1791 goto out;
1792 if (size != po->pg_vec_len*po->pg_vec_pages*PAGE_SIZE)
1793 goto out;
1794
Linus Torvalds1da177e2005-04-16 15:20:36 -07001795 start = vma->vm_start;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001796 for (i = 0; i < po->pg_vec_len; i++) {
1797 struct page *page = virt_to_page(po->pg_vec[i]);
1798 int pg_num;
1799
1800 for (pg_num = 0; pg_num < po->pg_vec_pages; pg_num++, page++) {
1801 err = vm_insert_page(vma, start, page);
1802 if (unlikely(err))
1803 goto out;
1804 start += PAGE_SIZE;
1805 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001806 }
David S. Miller4ebf0ae2005-12-06 16:38:35 -08001807 atomic_inc(&po->mapped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001808 vma->vm_ops = &packet_mmap_ops;
1809 err = 0;
1810
1811out:
1812 release_sock(sk);
1813 return err;
1814}
1815#endif
1816
1817
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08001818static const struct proto_ops packet_ops_spkt = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001819 .family = PF_PACKET,
1820 .owner = THIS_MODULE,
1821 .release = packet_release,
1822 .bind = packet_bind_spkt,
1823 .connect = sock_no_connect,
1824 .socketpair = sock_no_socketpair,
1825 .accept = sock_no_accept,
1826 .getname = packet_getname_spkt,
1827 .poll = datagram_poll,
1828 .ioctl = packet_ioctl,
1829 .listen = sock_no_listen,
1830 .shutdown = sock_no_shutdown,
1831 .setsockopt = sock_no_setsockopt,
1832 .getsockopt = sock_no_getsockopt,
1833 .sendmsg = packet_sendmsg_spkt,
1834 .recvmsg = packet_recvmsg,
1835 .mmap = sock_no_mmap,
1836 .sendpage = sock_no_sendpage,
1837};
Linus Torvalds1da177e2005-04-16 15:20:36 -07001838
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08001839static const struct proto_ops packet_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001840 .family = PF_PACKET,
1841 .owner = THIS_MODULE,
1842 .release = packet_release,
1843 .bind = packet_bind,
1844 .connect = sock_no_connect,
1845 .socketpair = sock_no_socketpair,
1846 .accept = sock_no_accept,
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001847 .getname = packet_getname,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001848 .poll = packet_poll,
1849 .ioctl = packet_ioctl,
1850 .listen = sock_no_listen,
1851 .shutdown = sock_no_shutdown,
1852 .setsockopt = packet_setsockopt,
1853 .getsockopt = packet_getsockopt,
1854 .sendmsg = packet_sendmsg,
1855 .recvmsg = packet_recvmsg,
1856 .mmap = packet_mmap,
1857 .sendpage = sock_no_sendpage,
1858};
1859
1860static struct net_proto_family packet_family_ops = {
1861 .family = PF_PACKET,
1862 .create = packet_create,
1863 .owner = THIS_MODULE,
1864};
1865
1866static struct notifier_block packet_netdev_notifier = {
1867 .notifier_call =packet_notifier,
1868};
1869
1870#ifdef CONFIG_PROC_FS
1871static inline struct sock *packet_seq_idx(loff_t off)
1872{
1873 struct sock *s;
1874 struct hlist_node *node;
1875
1876 sk_for_each(s, node, &packet_sklist) {
1877 if (!off--)
1878 return s;
1879 }
1880 return NULL;
1881}
1882
1883static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
1884{
1885 read_lock(&packet_sklist_lock);
1886 return *pos ? packet_seq_idx(*pos - 1) : SEQ_START_TOKEN;
1887}
1888
1889static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1890{
1891 ++*pos;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001892 return (v == SEQ_START_TOKEN)
1893 ? sk_head(&packet_sklist)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001894 : sk_next((struct sock*)v) ;
1895}
1896
1897static void packet_seq_stop(struct seq_file *seq, void *v)
1898{
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001899 read_unlock(&packet_sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001900}
1901
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001902static int packet_seq_show(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001903{
1904 if (v == SEQ_START_TOKEN)
1905 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
1906 else {
1907 struct sock *s = v;
1908 const struct packet_sock *po = pkt_sk(s);
1909
1910 seq_printf(seq,
1911 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
1912 s,
1913 atomic_read(&s->sk_refcnt),
1914 s->sk_type,
1915 ntohs(po->num),
1916 po->ifindex,
1917 po->running,
1918 atomic_read(&s->sk_rmem_alloc),
1919 sock_i_uid(s),
1920 sock_i_ino(s) );
1921 }
1922
1923 return 0;
1924}
1925
Philippe De Muyter56b3d972007-07-10 23:07:31 -07001926static const struct seq_operations packet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001927 .start = packet_seq_start,
1928 .next = packet_seq_next,
1929 .stop = packet_seq_stop,
1930 .show = packet_seq_show,
1931};
1932
1933static int packet_seq_open(struct inode *inode, struct file *file)
1934{
1935 return seq_open(file, &packet_seq_ops);
1936}
1937
Arjan van de Venda7071d2007-02-12 00:55:36 -08001938static const struct file_operations packet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001939 .owner = THIS_MODULE,
1940 .open = packet_seq_open,
1941 .read = seq_read,
1942 .llseek = seq_lseek,
1943 .release = seq_release,
1944};
1945
1946#endif
1947
1948static void __exit packet_exit(void)
1949{
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02001950 proc_net_remove(&init_net, "packet");
Linus Torvalds1da177e2005-04-16 15:20:36 -07001951 unregister_netdevice_notifier(&packet_netdev_notifier);
1952 sock_unregister(PF_PACKET);
1953 proto_unregister(&packet_proto);
1954}
1955
1956static int __init packet_init(void)
1957{
1958 int rc = proto_register(&packet_proto, 0);
1959
1960 if (rc != 0)
1961 goto out;
1962
1963 sock_register(&packet_family_ops);
1964 register_netdevice_notifier(&packet_netdev_notifier);
Eric W. Biederman457c4cb2007-09-12 12:01:34 +02001965 proc_net_fops_create(&init_net, "packet", 0, &packet_seq_fops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966out:
1967 return rc;
1968}
1969
1970module_init(packet_init);
1971module_exit(packet_exit);
1972MODULE_LICENSE("GPL");
1973MODULE_ALIAS_NETPROTO(PF_PACKET);