blob: 26fbeb140a6a162d4866cc68c0fab917c8502dcd [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * PACKET - implements raw packet sockets.
7 *
Jesper Juhl02c30a82005-05-05 16:16:16 -07008 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -07009 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
10 * Alan Cox, <gw4pts@gw4pts.ampr.org>
11 *
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090012 * Fixes:
Linus Torvalds1da177e2005-04-16 15:20:36 -070013 * Alan Cox : verify_area() now used correctly
14 * Alan Cox : new skbuff lists, look ma no backlogs!
15 * Alan Cox : tidied skbuff lists.
16 * Alan Cox : Now uses generic datagram routines I
17 * added. Also fixed the peek/read crash
18 * from all old Linux datagram code.
19 * Alan Cox : Uses the improved datagram code.
20 * Alan Cox : Added NULL's for socket options.
21 * Alan Cox : Re-commented the code.
22 * Alan Cox : Use new kernel side addressing
23 * Rob Janssen : Correct MTU usage.
24 * Dave Platt : Counter leaks caused by incorrect
25 * interrupt locking and some slightly
26 * dubious gcc output. Can you read
27 * compiler: it said _VOLATILE_
28 * Richard Kooijman : Timestamp fixes.
29 * Alan Cox : New buffers. Use sk->mac.raw.
30 * Alan Cox : sendmsg/recvmsg support.
31 * Alan Cox : Protocol setting support
32 * Alexey Kuznetsov : Untied from IPv4 stack.
33 * Cyrus Durgin : Fixed kerneld for kmod.
34 * Michal Ostrowski : Module initialization cleanup.
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090035 * Ulises Alonso : Frame number limit removal and
Linus Torvalds1da177e2005-04-16 15:20:36 -070036 * packet_set_ring memory leak.
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070037 * Eric Biederman : Allow for > 8 byte hardware addresses.
38 * The convention is that longer addresses
39 * will simply extend the hardware address
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090040 * byte arrays at the end of sockaddr_ll
Eric W. Biederman0fb375f2005-09-21 00:11:37 -070041 * and packet_mreq.
Johann Baudy69e3c752009-05-18 22:11:22 -070042 * Johann Baudy : Added TX RING.
Linus Torvalds1da177e2005-04-16 15:20:36 -070043 *
44 * This program is free software; you can redistribute it and/or
45 * modify it under the terms of the GNU General Public License
46 * as published by the Free Software Foundation; either version
47 * 2 of the License, or (at your option) any later version.
48 *
49 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +090050
Linus Torvalds1da177e2005-04-16 15:20:36 -070051#include <linux/types.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070052#include <linux/mm.h>
Randy Dunlap4fc268d2006-01-11 12:17:47 -080053#include <linux/capability.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070054#include <linux/fcntl.h>
55#include <linux/socket.h>
56#include <linux/in.h>
57#include <linux/inet.h>
58#include <linux/netdevice.h>
59#include <linux/if_packet.h>
60#include <linux/wireless.h>
Herbert Xuffbc6112007-02-04 23:33:10 -080061#include <linux/kernel.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070062#include <linux/kmod.h>
Tejun Heo5a0e3ad2010-03-24 17:04:11 +090063#include <linux/slab.h>
Neil Horman0e3125c2010-11-16 10:26:47 -080064#include <linux/vmalloc.h>
Eric W. Biederman457c4cb2007-09-12 12:01:34 +020065#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070066#include <net/ip.h>
67#include <net/protocol.h>
68#include <linux/skbuff.h>
69#include <net/sock.h>
70#include <linux/errno.h>
71#include <linux/timer.h>
72#include <asm/system.h>
73#include <asm/uaccess.h>
74#include <asm/ioctls.h>
75#include <asm/page.h>
Al Viroa1f8e7f72006-10-19 16:08:53 -040076#include <asm/cacheflush.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070077#include <asm/io.h>
78#include <linux/proc_fs.h>
79#include <linux/seq_file.h>
80#include <linux/poll.h>
81#include <linux/module.h>
82#include <linux/init.h>
Herbert Xu905db442009-01-30 14:12:06 -080083#include <linux/mutex.h>
Eric Dumazet05423b22009-10-26 18:40:35 -070084#include <linux/if_vlan.h>
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -080085#include <linux/virtio_net.h>
Richard Cochraned85b562010-04-07 22:41:28 +000086#include <linux/errqueue.h>
Scott McMillan614f60f2010-06-02 05:53:56 -070087#include <linux/net_tstamp.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070088
89#ifdef CONFIG_INET
90#include <net/inet_common.h>
91#endif
92
Linus Torvalds1da177e2005-04-16 15:20:36 -070093/*
Linus Torvalds1da177e2005-04-16 15:20:36 -070094 Assumptions:
95 - if device has no dev->hard_header routine, it adds and removes ll header
96 inside itself. In this case ll header is invisible outside of device,
97 but higher levels still should reserve dev->hard_header_len.
98 Some devices are enough clever to reallocate skb, when header
99 will not fit to reserved space (tunnel), another ones are silly
100 (PPP).
101 - packet socket receives packets with pulled ll header,
102 so that SOCK_RAW should push it back.
103
104On receive:
105-----------
106
107Incoming, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700108 mac_header -> ll header
109 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700110
111Outgoing, dev->hard_header!=NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700112 mac_header -> ll header
113 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700114
115Incoming, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700116 mac_header -> UNKNOWN position. It is very likely, that it points to ll
117 header. PPP makes it, that is wrong, because introduce
YOSHIFUJI Hideakidb0c58f2007-07-19 10:44:35 +0900118 assymetry between rx and tx paths.
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700119 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700120
121Outgoing, dev->hard_header==NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700122 mac_header -> data. ll header is still not built!
123 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700124
125Resume
126 If dev->hard_header==NULL we are unlikely to restore sensible ll header.
127
128
129On transmit:
130------------
131
132dev->hard_header != NULL
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700133 mac_header -> ll header
134 data -> ll header
Linus Torvalds1da177e2005-04-16 15:20:36 -0700135
136dev->hard_header == NULL (ll header is added by device, we cannot control it)
Arnaldo Carvalho de Melob0e380b2007-04-10 21:21:55 -0700137 mac_header -> data
138 data -> data
Linus Torvalds1da177e2005-04-16 15:20:36 -0700139
140 We should set nh.raw on output to correct posistion,
141 packet classifier depends on it.
142 */
143
Linus Torvalds1da177e2005-04-16 15:20:36 -0700144/* Private packet socket structures. */
145
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000146struct packet_mclist {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700147 struct packet_mclist *next;
148 int ifindex;
149 int count;
150 unsigned short type;
151 unsigned short alen;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700152 unsigned char addr[MAX_ADDR_LEN];
153};
154/* identical to struct packet_mreq except it has
155 * a longer address field.
156 */
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000157struct packet_mreq_max {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -0700158 int mr_ifindex;
159 unsigned short mr_type;
160 unsigned short mr_alen;
161 unsigned char mr_address[MAX_ADDR_LEN];
Linus Torvalds1da177e2005-04-16 15:20:36 -0700162};
David S. Millera2efcfa2007-05-29 13:12:50 -0700163
Johann Baudy69e3c752009-05-18 22:11:22 -0700164static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
165 int closing, int tx_ring);
166
Neil Horman0e3125c2010-11-16 10:26:47 -0800167#define PGV_FROM_VMALLOC 1
168struct pgv {
169 char *buffer;
170 unsigned char flags;
171};
172
Johann Baudy69e3c752009-05-18 22:11:22 -0700173struct packet_ring_buffer {
Neil Horman0e3125c2010-11-16 10:26:47 -0800174 struct pgv *pg_vec;
Johann Baudy69e3c752009-05-18 22:11:22 -0700175 unsigned int head;
176 unsigned int frames_per_block;
177 unsigned int frame_size;
178 unsigned int frame_max;
179
180 unsigned int pg_vec_order;
181 unsigned int pg_vec_pages;
182 unsigned int pg_vec_len;
183
184 atomic_t pending;
185};
186
187struct packet_sock;
188static int tpacket_snd(struct packet_sock *po, struct msghdr *msg);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700189
190static void packet_flush_mclist(struct sock *sk);
191
192struct packet_sock {
193 /* struct sock has to be the first member of packet_sock */
194 struct sock sk;
195 struct tpacket_stats stats;
Johann Baudy69e3c752009-05-18 22:11:22 -0700196 struct packet_ring_buffer rx_ring;
197 struct packet_ring_buffer tx_ring;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700198 int copy_thresh;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700199 spinlock_t bind_lock;
Herbert Xu905db442009-01-30 14:12:06 -0800200 struct mutex pg_vec_lock;
Herbert Xu8dc41942007-02-04 23:31:32 -0800201 unsigned int running:1, /* prot_hook is attached*/
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700202 auxdata:1,
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -0800203 origdev:1,
204 has_vnet_hdr:1;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700205 int ifindex; /* bound device */
Al Viro0e11c912006-11-08 00:26:29 -0800206 __be16 num;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700207 struct packet_mclist *mclist;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700208 atomic_t mapped;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700209 enum tpacket_versions tp_version;
210 unsigned int tp_hdrlen;
Patrick McHardy8913336a2008-07-18 18:05:19 -0700211 unsigned int tp_reserve;
Johann Baudy69e3c752009-05-18 22:11:22 -0700212 unsigned int tp_loss:1;
Scott McMillan614f60f2010-06-02 05:53:56 -0700213 unsigned int tp_tstamp;
Eric Dumazet94b05952009-10-16 04:02:20 +0000214 struct packet_type prot_hook ____cacheline_aligned_in_smp;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700215};
216
Herbert Xuffbc6112007-02-04 23:33:10 -0800217struct packet_skb_cb {
218 unsigned int origlen;
219 union {
220 struct sockaddr_pkt pkt;
221 struct sockaddr_ll ll;
222 } sa;
223};
224
225#define PACKET_SKB_CB(__skb) ((struct packet_skb_cb *)((__skb)->cb))
Herbert Xu8dc41942007-02-04 23:31:32 -0800226
Changli Gao0af55bb2010-12-01 02:52:20 +0000227static inline struct page *pgv_to_page(void *addr)
228{
229 if (is_vmalloc_addr(addr))
230 return vmalloc_to_page(addr);
231 return virt_to_page(addr);
232}
233
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700234static void __packet_set_status(struct packet_sock *po, void *frame, int status)
235{
236 union {
237 struct tpacket_hdr *h1;
238 struct tpacket2_hdr *h2;
239 void *raw;
240 } h;
241
242 h.raw = frame;
243 switch (po->tp_version) {
244 case TPACKET_V1:
245 h.h1->tp_status = status;
Changli Gao0af55bb2010-12-01 02:52:20 +0000246 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700247 break;
248 case TPACKET_V2:
249 h.h2->tp_status = status;
Changli Gao0af55bb2010-12-01 02:52:20 +0000250 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700251 break;
Johann Baudy69e3c752009-05-18 22:11:22 -0700252 default:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000253 pr_err("TPACKET version not supported\n");
Johann Baudy69e3c752009-05-18 22:11:22 -0700254 BUG();
255 }
256
257 smp_wmb();
258}
259
260static int __packet_get_status(struct packet_sock *po, void *frame)
261{
262 union {
263 struct tpacket_hdr *h1;
264 struct tpacket2_hdr *h2;
265 void *raw;
266 } h;
267
268 smp_rmb();
269
270 h.raw = frame;
271 switch (po->tp_version) {
272 case TPACKET_V1:
Changli Gao0af55bb2010-12-01 02:52:20 +0000273 flush_dcache_page(pgv_to_page(&h.h1->tp_status));
Johann Baudy69e3c752009-05-18 22:11:22 -0700274 return h.h1->tp_status;
275 case TPACKET_V2:
Changli Gao0af55bb2010-12-01 02:52:20 +0000276 flush_dcache_page(pgv_to_page(&h.h2->tp_status));
Johann Baudy69e3c752009-05-18 22:11:22 -0700277 return h.h2->tp_status;
278 default:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000279 pr_err("TPACKET version not supported\n");
Johann Baudy69e3c752009-05-18 22:11:22 -0700280 BUG();
281 return 0;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700282 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700283}
Johann Baudy69e3c752009-05-18 22:11:22 -0700284
285static void *packet_lookup_frame(struct packet_sock *po,
286 struct packet_ring_buffer *rb,
287 unsigned int position,
288 int status)
289{
290 unsigned int pg_vec_pos, frame_offset;
291 union {
292 struct tpacket_hdr *h1;
293 struct tpacket2_hdr *h2;
294 void *raw;
295 } h;
296
297 pg_vec_pos = position / rb->frames_per_block;
298 frame_offset = position % rb->frames_per_block;
299
Neil Horman0e3125c2010-11-16 10:26:47 -0800300 h.raw = rb->pg_vec[pg_vec_pos].buffer +
301 (frame_offset * rb->frame_size);
Johann Baudy69e3c752009-05-18 22:11:22 -0700302
303 if (status != __packet_get_status(po, h.raw))
304 return NULL;
305
306 return h.raw;
307}
308
309static inline void *packet_current_frame(struct packet_sock *po,
310 struct packet_ring_buffer *rb,
311 int status)
312{
313 return packet_lookup_frame(po, rb, rb->head, status);
314}
315
316static inline void *packet_previous_frame(struct packet_sock *po,
317 struct packet_ring_buffer *rb,
318 int status)
319{
320 unsigned int previous = rb->head ? rb->head - 1 : rb->frame_max;
321 return packet_lookup_frame(po, rb, previous, status);
322}
323
324static inline void packet_increment_head(struct packet_ring_buffer *buff)
325{
326 buff->head = buff->head != buff->frame_max ? buff->head+1 : 0;
327}
328
Linus Torvalds1da177e2005-04-16 15:20:36 -0700329static inline struct packet_sock *pkt_sk(struct sock *sk)
330{
331 return (struct packet_sock *)sk;
332}
333
334static void packet_sock_destruct(struct sock *sk)
335{
Richard Cochraned85b562010-04-07 22:41:28 +0000336 skb_queue_purge(&sk->sk_error_queue);
337
Ilpo Järvinen547b7922008-07-25 21:43:18 -0700338 WARN_ON(atomic_read(&sk->sk_rmem_alloc));
339 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700340
341 if (!sock_flag(sk, SOCK_DEAD)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000342 pr_err("Attempt to release alive packet socket: %p\n", sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700343 return;
344 }
345
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -0800346 sk_refcnt_debug_dec(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700347}
348
349
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800350static const struct proto_ops packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700351
Eric Dumazet90ddc4f2005-12-22 12:49:22 -0800352static const struct proto_ops packet_ops_spkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700353
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000354static int packet_rcv_spkt(struct sk_buff *skb, struct net_device *dev,
355 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700356{
357 struct sock *sk;
358 struct sockaddr_pkt *spkt;
359
360 /*
361 * When we registered the protocol we saved the socket in the data
362 * field for just this event.
363 */
364
365 sk = pt->af_packet_priv;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900366
Linus Torvalds1da177e2005-04-16 15:20:36 -0700367 /*
368 * Yank back the headers [hope the device set this
369 * right or kerboom...]
370 *
371 * Incoming packets have ll header pulled,
372 * push it back.
373 *
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700374 * For outgoing ones skb->data == skb_mac_header(skb)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700375 * so that this procedure is noop.
376 */
377
378 if (skb->pkt_type == PACKET_LOOPBACK)
379 goto out;
380
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800381 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800382 goto out;
383
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000384 skb = skb_share_check(skb, GFP_ATOMIC);
385 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700386 goto oom;
387
388 /* drop any routing info */
Eric Dumazetadf30902009-06-02 05:19:30 +0000389 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700390
Phil Oester84531c22005-07-12 11:57:52 -0700391 /* drop conntrack reference */
392 nf_reset(skb);
393
Herbert Xuffbc6112007-02-04 23:33:10 -0800394 spkt = &PACKET_SKB_CB(skb)->sa.pkt;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700395
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700396 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700397
398 /*
399 * The SOCK_PACKET socket receives _all_ frames.
400 */
401
402 spkt->spkt_family = dev->type;
403 strlcpy(spkt->spkt_device, dev->name, sizeof(spkt->spkt_device));
404 spkt->spkt_protocol = skb->protocol;
405
406 /*
407 * Charge the memory to the socket. This is done specifically
408 * to prevent sockets using all the memory up.
409 */
410
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000411 if (sock_queue_rcv_skb(sk, skb) == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700412 return 0;
413
414out:
415 kfree_skb(skb);
416oom:
417 return 0;
418}
419
420
421/*
422 * Output a raw packet to a device layer. This bypasses all the other
423 * protocol layers and you must therefore supply it with a complete frame
424 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900425
Linus Torvalds1da177e2005-04-16 15:20:36 -0700426static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
427 struct msghdr *msg, size_t len)
428{
429 struct sock *sk = sock->sk;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000430 struct sockaddr_pkt *saddr = (struct sockaddr_pkt *)msg->msg_name;
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000431 struct sk_buff *skb = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700432 struct net_device *dev;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000433 __be16 proto = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700434 int err;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900435
Linus Torvalds1da177e2005-04-16 15:20:36 -0700436 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900437 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700438 */
439
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000440 if (saddr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700441 if (msg->msg_namelen < sizeof(struct sockaddr))
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000442 return -EINVAL;
443 if (msg->msg_namelen == sizeof(struct sockaddr_pkt))
444 proto = saddr->spkt_protocol;
445 } else
446 return -ENOTCONN; /* SOCK_PACKET must be sent giving an address */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700447
448 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900449 * Find the device first to size check it
Linus Torvalds1da177e2005-04-16 15:20:36 -0700450 */
451
452 saddr->spkt_device[13] = 0;
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000453retry:
Eric Dumazet654d1f82009-11-02 10:43:32 +0100454 rcu_read_lock();
455 dev = dev_get_by_name_rcu(sock_net(sk), saddr->spkt_device);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700456 err = -ENODEV;
457 if (dev == NULL)
458 goto out_unlock;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900459
David S. Millerd5e76b02007-01-25 19:30:36 -0800460 err = -ENETDOWN;
461 if (!(dev->flags & IFF_UP))
462 goto out_unlock;
463
Linus Torvalds1da177e2005-04-16 15:20:36 -0700464 /*
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000465 * You may not queue a frame bigger than the mtu. This is the lowest level
466 * raw protocol and you must do your own fragmentation at this level.
Linus Torvalds1da177e2005-04-16 15:20:36 -0700467 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900468
Linus Torvalds1da177e2005-04-16 15:20:36 -0700469 err = -EMSGSIZE;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -0800470 if (len > dev->mtu + dev->hard_header_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700471 goto out_unlock;
472
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000473 if (!skb) {
474 size_t reserved = LL_RESERVED_SPACE(dev);
475 unsigned int hhlen = dev->header_ops ? dev->hard_header_len : 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700476
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000477 rcu_read_unlock();
478 skb = sock_wmalloc(sk, len + reserved, 0, GFP_KERNEL);
479 if (skb == NULL)
480 return -ENOBUFS;
481 /* FIXME: Save some space for broken drivers that write a hard
482 * header at transmission time by themselves. PPP is the notable
483 * one here. This should really be fixed at the driver level.
484 */
485 skb_reserve(skb, reserved);
486 skb_reset_network_header(skb);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900487
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000488 /* Try to align data part correctly */
489 if (hhlen) {
490 skb->data -= hhlen;
491 skb->tail -= hhlen;
492 if (len < hhlen)
493 skb_reset_network_header(skb);
494 }
495 err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
496 if (err)
497 goto out_free;
498 goto retry;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700499 }
500
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000501
Linus Torvalds1da177e2005-04-16 15:20:36 -0700502 skb->protocol = proto;
503 skb->dev = dev;
504 skb->priority = sk->sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +0000505 skb->mark = sk->sk_mark;
Oliver Hartkopp2244d072010-08-17 08:59:14 +0000506 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
Richard Cochraned85b562010-04-07 22:41:28 +0000507 if (err < 0)
508 goto out_unlock;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700509
510 dev_queue_xmit(skb);
Eric Dumazet654d1f82009-11-02 10:43:32 +0100511 rcu_read_unlock();
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000512 return len;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700513
Linus Torvalds1da177e2005-04-16 15:20:36 -0700514out_unlock:
Eric Dumazet654d1f82009-11-02 10:43:32 +0100515 rcu_read_unlock();
Eric Dumazet1a35ca82009-12-15 05:47:03 +0000516out_free:
517 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700518 return err;
519}
Linus Torvalds1da177e2005-04-16 15:20:36 -0700520
David S. Millerdbcb5852007-01-24 15:21:02 -0800521static inline unsigned int run_filter(struct sk_buff *skb, struct sock *sk,
522 unsigned int res)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700523{
524 struct sk_filter *filter;
525
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700526 rcu_read_lock_bh();
Paul E. McKenneya898def2010-02-22 17:04:49 -0800527 filter = rcu_dereference_bh(sk->sk_filter);
David S. Millerdbcb5852007-01-24 15:21:02 -0800528 if (filter != NULL)
Eric Dumazet93aaae22010-11-19 09:49:59 -0800529 res = sk_run_filter(skb, filter->insns);
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700530 rcu_read_unlock_bh();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700531
David S. Millerdbcb5852007-01-24 15:21:02 -0800532 return res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700533}
534
535/*
536 This function makes lazy skb cloning in hope that most of packets
537 are discarded by BPF.
538
539 Note tricky part: we DO mangle shared skb! skb->data, skb->len
540 and skb->cb are mangled. It works because (and until) packets
541 falling here are owned by current CPU. Output packets are cloned
542 by dev_queue_xmit_nit(), input packets are processed by net_bh
543 sequencially, so that if we return skb to original state on exit,
544 we will not harm anyone.
545 */
546
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000547static int packet_rcv(struct sk_buff *skb, struct net_device *dev,
548 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700549{
550 struct sock *sk;
551 struct sockaddr_ll *sll;
552 struct packet_sock *po;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000553 u8 *skb_head = skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700554 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800555 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700556
557 if (skb->pkt_type == PACKET_LOOPBACK)
558 goto drop;
559
560 sk = pt->af_packet_priv;
561 po = pkt_sk(sk);
562
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800563 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800564 goto drop;
565
Linus Torvalds1da177e2005-04-16 15:20:36 -0700566 skb->dev = dev;
567
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700568 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700569 /* The device has an explicit notion of ll header,
570 exported to higher levels.
571
572 Otherwise, the device hides datails of it frame
573 structure, so that corresponding packet head
574 never delivered to user.
575 */
576 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700577 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700578 else if (skb->pkt_type == PACKET_OUTGOING) {
579 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300580 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700581 }
582 }
583
584 snaplen = skb->len;
585
David S. Millerdbcb5852007-01-24 15:21:02 -0800586 res = run_filter(skb, sk, snaplen);
587 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700588 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800589 if (snaplen > res)
590 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700591
592 if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >=
593 (unsigned)sk->sk_rcvbuf)
594 goto drop_n_acct;
595
596 if (skb_shared(skb)) {
597 struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC);
598 if (nskb == NULL)
599 goto drop_n_acct;
600
601 if (skb_head != skb->data) {
602 skb->data = skb_head;
603 skb->len = skb_len;
604 }
605 kfree_skb(skb);
606 skb = nskb;
607 }
608
Herbert Xuffbc6112007-02-04 23:33:10 -0800609 BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
610 sizeof(skb->cb));
611
612 sll = &PACKET_SKB_CB(skb)->sa.ll;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700613 sll->sll_family = AF_PACKET;
614 sll->sll_hatype = dev->type;
615 sll->sll_protocol = skb->protocol;
616 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800617 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700618 sll->sll_ifindex = orig_dev->ifindex;
619 else
620 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700621
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700622 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700623
Herbert Xuffbc6112007-02-04 23:33:10 -0800624 PACKET_SKB_CB(skb)->origlen = skb->len;
Herbert Xu8dc41942007-02-04 23:31:32 -0800625
Linus Torvalds1da177e2005-04-16 15:20:36 -0700626 if (pskb_trim(skb, snaplen))
627 goto drop_n_acct;
628
629 skb_set_owner_r(skb, sk);
630 skb->dev = NULL;
Eric Dumazetadf30902009-06-02 05:19:30 +0000631 skb_dst_drop(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700632
Phil Oester84531c22005-07-12 11:57:52 -0700633 /* drop conntrack reference */
634 nf_reset(skb);
635
Linus Torvalds1da177e2005-04-16 15:20:36 -0700636 spin_lock(&sk->sk_receive_queue.lock);
637 po->stats.tp_packets++;
Neil Horman3b885782009-10-12 13:26:31 -0700638 skb->dropcount = atomic_read(&sk->sk_drops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700639 __skb_queue_tail(&sk->sk_receive_queue, skb);
640 spin_unlock(&sk->sk_receive_queue.lock);
641 sk->sk_data_ready(sk, skb->len);
642 return 0;
643
644drop_n_acct:
Neil Horman3b885782009-10-12 13:26:31 -0700645 po->stats.tp_drops = atomic_inc_return(&sk->sk_drops);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700646
647drop_n_restore:
648 if (skb_head != skb->data && skb_shared(skb)) {
649 skb->data = skb_head;
650 skb->len = skb_len;
651 }
652drop:
Neil Hormanead2ceb2009-03-11 09:49:55 +0000653 consume_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700654 return 0;
655}
656
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000657static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
658 struct packet_type *pt, struct net_device *orig_dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700659{
660 struct sock *sk;
661 struct packet_sock *po;
662 struct sockaddr_ll *sll;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700663 union {
664 struct tpacket_hdr *h1;
665 struct tpacket2_hdr *h2;
666 void *raw;
667 } h;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000668 u8 *skb_head = skb->data;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700669 int skb_len = skb->len;
David S. Millerdbcb5852007-01-24 15:21:02 -0800670 unsigned int snaplen, res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700671 unsigned long status = TP_STATUS_LOSING|TP_STATUS_USER;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700672 unsigned short macoff, netoff, hdrlen;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700673 struct sk_buff *copy_skb = NULL;
Eric Dumazetb7aa0bf2007-04-19 16:16:32 -0700674 struct timeval tv;
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700675 struct timespec ts;
Scott McMillan614f60f2010-06-02 05:53:56 -0700676 struct skb_shared_hwtstamps *shhwtstamps = skb_hwtstamps(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700677
678 if (skb->pkt_type == PACKET_LOOPBACK)
679 goto drop;
680
681 sk = pt->af_packet_priv;
682 po = pkt_sk(sk);
683
Octavian Purdila09ad9bc2009-11-25 15:14:13 -0800684 if (!net_eq(dev_net(dev), sock_net(sk)))
Denis V. Lunevd12d01d2007-11-19 22:28:35 -0800685 goto drop;
686
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700687 if (dev->header_ops) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700688 if (sk->sk_type != SOCK_DGRAM)
Arnaldo Carvalho de Melo98e399f2007-03-19 15:33:04 -0700689 skb_push(skb, skb->data - skb_mac_header(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700690 else if (skb->pkt_type == PACKET_OUTGOING) {
691 /* Special case: outgoing packets have ll header at head */
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300692 skb_pull(skb, skb_network_offset(skb));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700693 }
694 }
695
Herbert Xu8dc41942007-02-04 23:31:32 -0800696 if (skb->ip_summed == CHECKSUM_PARTIAL)
697 status |= TP_STATUS_CSUMNOTREADY;
698
Linus Torvalds1da177e2005-04-16 15:20:36 -0700699 snaplen = skb->len;
700
David S. Millerdbcb5852007-01-24 15:21:02 -0800701 res = run_filter(skb, sk, snaplen);
702 if (!res)
Dmitry Mishinfda9ef52006-08-31 15:28:39 -0700703 goto drop_n_restore;
David S. Millerdbcb5852007-01-24 15:21:02 -0800704 if (snaplen > res)
705 snaplen = res;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700706
707 if (sk->sk_type == SOCK_DGRAM) {
Patrick McHardy8913336a2008-07-18 18:05:19 -0700708 macoff = netoff = TPACKET_ALIGN(po->tp_hdrlen) + 16 +
709 po->tp_reserve;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700710 } else {
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -0300711 unsigned maclen = skb_network_offset(skb);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700712 netoff = TPACKET_ALIGN(po->tp_hdrlen +
Patrick McHardy8913336a2008-07-18 18:05:19 -0700713 (maclen < 16 ? 16 : maclen)) +
714 po->tp_reserve;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700715 macoff = netoff - maclen;
716 }
717
Johann Baudy69e3c752009-05-18 22:11:22 -0700718 if (macoff + snaplen > po->rx_ring.frame_size) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700719 if (po->copy_thresh &&
720 atomic_read(&sk->sk_rmem_alloc) + skb->truesize <
721 (unsigned)sk->sk_rcvbuf) {
722 if (skb_shared(skb)) {
723 copy_skb = skb_clone(skb, GFP_ATOMIC);
724 } else {
725 copy_skb = skb_get(skb);
726 skb_head = skb->data;
727 }
728 if (copy_skb)
729 skb_set_owner_r(copy_skb, sk);
730 }
Johann Baudy69e3c752009-05-18 22:11:22 -0700731 snaplen = po->rx_ring.frame_size - macoff;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700732 if ((int)snaplen < 0)
733 snaplen = 0;
734 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700735
736 spin_lock(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -0700737 h.raw = packet_current_frame(po, &po->rx_ring, TP_STATUS_KERNEL);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700738 if (!h.raw)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700739 goto ring_is_full;
Johann Baudy69e3c752009-05-18 22:11:22 -0700740 packet_increment_head(&po->rx_ring);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700741 po->stats.tp_packets++;
742 if (copy_skb) {
743 status |= TP_STATUS_COPY;
744 __skb_queue_tail(&sk->sk_receive_queue, copy_skb);
745 }
746 if (!po->stats.tp_drops)
747 status &= ~TP_STATUS_LOSING;
748 spin_unlock(&sk->sk_receive_queue.lock);
749
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700750 skb_copy_bits(skb, 0, h.raw + macoff, snaplen);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700751
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700752 switch (po->tp_version) {
753 case TPACKET_V1:
754 h.h1->tp_len = skb->len;
755 h.h1->tp_snaplen = snaplen;
756 h.h1->tp_mac = macoff;
757 h.h1->tp_net = netoff;
Scott McMillan614f60f2010-06-02 05:53:56 -0700758 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
759 && shhwtstamps->syststamp.tv64)
760 tv = ktime_to_timeval(shhwtstamps->syststamp);
761 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
762 && shhwtstamps->hwtstamp.tv64)
763 tv = ktime_to_timeval(shhwtstamps->hwtstamp);
764 else if (skb->tstamp.tv64)
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700765 tv = ktime_to_timeval(skb->tstamp);
766 else
767 do_gettimeofday(&tv);
768 h.h1->tp_sec = tv.tv_sec;
769 h.h1->tp_usec = tv.tv_usec;
770 hdrlen = sizeof(*h.h1);
771 break;
772 case TPACKET_V2:
773 h.h2->tp_len = skb->len;
774 h.h2->tp_snaplen = snaplen;
775 h.h2->tp_mac = macoff;
776 h.h2->tp_net = netoff;
Scott McMillan614f60f2010-06-02 05:53:56 -0700777 if ((po->tp_tstamp & SOF_TIMESTAMPING_SYS_HARDWARE)
778 && shhwtstamps->syststamp.tv64)
779 ts = ktime_to_timespec(shhwtstamps->syststamp);
780 else if ((po->tp_tstamp & SOF_TIMESTAMPING_RAW_HARDWARE)
781 && shhwtstamps->hwtstamp.tv64)
782 ts = ktime_to_timespec(shhwtstamps->hwtstamp);
783 else if (skb->tstamp.tv64)
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700784 ts = ktime_to_timespec(skb->tstamp);
785 else
786 getnstimeofday(&ts);
787 h.h2->tp_sec = ts.tv_sec;
788 h.h2->tp_nsec = ts.tv_nsec;
Eric Dumazet05423b22009-10-26 18:40:35 -0700789 h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700790 hdrlen = sizeof(*h.h2);
791 break;
792 default:
793 BUG();
794 }
Linus Torvalds1da177e2005-04-16 15:20:36 -0700795
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700796 sll = h.raw + TPACKET_ALIGN(hdrlen);
Stephen Hemmingerb95cce32007-09-26 22:13:38 -0700797 sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700798 sll->sll_family = AF_PACKET;
799 sll->sll_hatype = dev->type;
800 sll->sll_protocol = skb->protocol;
801 sll->sll_pkttype = skb->pkt_type;
Peter P Waskiewicz Jr8032b462007-11-10 22:03:25 -0800802 if (unlikely(po->origdev))
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -0700803 sll->sll_ifindex = orig_dev->ifindex;
804 else
805 sll->sll_ifindex = dev->ifindex;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700806
Patrick McHardybbd6ef82008-07-14 22:50:15 -0700807 __packet_set_status(po, h.raw, status);
Ralf Baechlee16aa202006-12-07 00:11:33 -0800808 smp_mb();
Linus Torvalds1da177e2005-04-16 15:20:36 -0700809 {
Changli Gao0af55bb2010-12-01 02:52:20 +0000810 u8 *start, *end;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700811
Changli Gao0af55bb2010-12-01 02:52:20 +0000812 end = (u8 *)PAGE_ALIGN((unsigned long)h.raw + macoff + snaplen);
813 for (start = h.raw; start < end; start += PAGE_SIZE)
814 flush_dcache_page(pgv_to_page(start));
Linus Torvalds1da177e2005-04-16 15:20:36 -0700815 }
816
817 sk->sk_data_ready(sk, 0);
818
819drop_n_restore:
820 if (skb_head != skb->data && skb_shared(skb)) {
821 skb->data = skb_head;
822 skb->len = skb_len;
823 }
824drop:
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +0900825 kfree_skb(skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700826 return 0;
827
828ring_is_full:
829 po->stats.tp_drops++;
830 spin_unlock(&sk->sk_receive_queue.lock);
831
832 sk->sk_data_ready(sk, 0);
Wei Yongjunacb5d752009-02-25 00:36:42 +0000833 kfree_skb(copy_skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700834 goto drop_n_restore;
835}
836
Johann Baudy69e3c752009-05-18 22:11:22 -0700837static void tpacket_destruct_skb(struct sk_buff *skb)
838{
839 struct packet_sock *po = pkt_sk(skb->sk);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000840 void *ph;
Johann Baudy69e3c752009-05-18 22:11:22 -0700841
842 BUG_ON(skb == NULL);
843
844 if (likely(po->tx_ring.pg_vec)) {
845 ph = skb_shinfo(skb)->destructor_arg;
846 BUG_ON(__packet_get_status(po, ph) != TP_STATUS_SENDING);
847 BUG_ON(atomic_read(&po->tx_ring.pending) == 0);
848 atomic_dec(&po->tx_ring.pending);
849 __packet_set_status(po, ph, TP_STATUS_AVAILABLE);
850 }
851
852 sock_wfree(skb);
853}
854
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000855static int tpacket_fill_skb(struct packet_sock *po, struct sk_buff *skb,
856 void *frame, struct net_device *dev, int size_max,
857 __be16 proto, unsigned char *addr)
Johann Baudy69e3c752009-05-18 22:11:22 -0700858{
859 union {
860 struct tpacket_hdr *h1;
861 struct tpacket2_hdr *h2;
862 void *raw;
863 } ph;
864 int to_write, offset, len, tp_len, nr_frags, len_max;
865 struct socket *sock = po->sk.sk_socket;
866 struct page *page;
867 void *data;
868 int err;
869
870 ph.raw = frame;
871
872 skb->protocol = proto;
873 skb->dev = dev;
874 skb->priority = po->sk.sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +0000875 skb->mark = po->sk.sk_mark;
Johann Baudy69e3c752009-05-18 22:11:22 -0700876 skb_shinfo(skb)->destructor_arg = ph.raw;
877
878 switch (po->tp_version) {
879 case TPACKET_V2:
880 tp_len = ph.h2->tp_len;
881 break;
882 default:
883 tp_len = ph.h1->tp_len;
884 break;
885 }
886 if (unlikely(tp_len > size_max)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000887 pr_err("packet size is too long (%d > %d)\n", tp_len, size_max);
Johann Baudy69e3c752009-05-18 22:11:22 -0700888 return -EMSGSIZE;
889 }
890
891 skb_reserve(skb, LL_RESERVED_SPACE(dev));
892 skb_reset_network_header(skb);
893
894 data = ph.raw + po->tp_hdrlen - sizeof(struct sockaddr_ll);
895 to_write = tp_len;
896
897 if (sock->type == SOCK_DGRAM) {
898 err = dev_hard_header(skb, dev, ntohs(proto), addr,
899 NULL, tp_len);
900 if (unlikely(err < 0))
901 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000902 } else if (dev->hard_header_len) {
Johann Baudy69e3c752009-05-18 22:11:22 -0700903 /* net device doesn't like empty head */
904 if (unlikely(tp_len <= dev->hard_header_len)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000905 pr_err("packet size is too short (%d < %d)\n",
906 tp_len, dev->hard_header_len);
Johann Baudy69e3c752009-05-18 22:11:22 -0700907 return -EINVAL;
908 }
909
910 skb_push(skb, dev->hard_header_len);
911 err = skb_store_bits(skb, 0, data,
912 dev->hard_header_len);
913 if (unlikely(err))
914 return err;
915
916 data += dev->hard_header_len;
917 to_write -= dev->hard_header_len;
918 }
919
920 err = -EFAULT;
Johann Baudy69e3c752009-05-18 22:11:22 -0700921 offset = offset_in_page(data);
922 len_max = PAGE_SIZE - offset;
923 len = ((to_write > len_max) ? len_max : to_write);
924
925 skb->data_len = to_write;
926 skb->len += to_write;
927 skb->truesize += to_write;
928 atomic_add(to_write, &po->sk.sk_wmem_alloc);
929
930 while (likely(to_write)) {
931 nr_frags = skb_shinfo(skb)->nr_frags;
932
933 if (unlikely(nr_frags >= MAX_SKB_FRAGS)) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000934 pr_err("Packet exceed the number of skb frags(%lu)\n",
935 MAX_SKB_FRAGS);
Johann Baudy69e3c752009-05-18 22:11:22 -0700936 return -EFAULT;
937 }
938
Changli Gao0af55bb2010-12-01 02:52:20 +0000939 page = pgv_to_page(data);
940 data += len;
Johann Baudy69e3c752009-05-18 22:11:22 -0700941 flush_dcache_page(page);
942 get_page(page);
Changli Gao0af55bb2010-12-01 02:52:20 +0000943 skb_fill_page_desc(skb, nr_frags, page, offset, len);
Johann Baudy69e3c752009-05-18 22:11:22 -0700944 to_write -= len;
945 offset = 0;
946 len_max = PAGE_SIZE;
947 len = ((to_write > len_max) ? len_max : to_write);
948 }
949
950 return tp_len;
951}
952
953static int tpacket_snd(struct packet_sock *po, struct msghdr *msg)
954{
955 struct socket *sock;
956 struct sk_buff *skb;
957 struct net_device *dev;
958 __be16 proto;
959 int ifindex, err, reserve = 0;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +0000960 void *ph;
961 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
Johann Baudy69e3c752009-05-18 22:11:22 -0700962 int tp_len, size_max;
963 unsigned char *addr;
964 int len_sum = 0;
965 int status = 0;
966
967 sock = po->sk.sk_socket;
968
969 mutex_lock(&po->pg_vec_lock);
970
971 err = -EBUSY;
972 if (saddr == NULL) {
973 ifindex = po->ifindex;
974 proto = po->num;
975 addr = NULL;
976 } else {
977 err = -EINVAL;
978 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
979 goto out;
980 if (msg->msg_namelen < (saddr->sll_halen
981 + offsetof(struct sockaddr_ll,
982 sll_addr)))
983 goto out;
984 ifindex = saddr->sll_ifindex;
985 proto = saddr->sll_protocol;
986 addr = saddr->sll_addr;
987 }
988
989 dev = dev_get_by_index(sock_net(&po->sk), ifindex);
990 err = -ENXIO;
991 if (unlikely(dev == NULL))
992 goto out;
993
994 reserve = dev->hard_header_len;
995
996 err = -ENETDOWN;
997 if (unlikely(!(dev->flags & IFF_UP)))
998 goto out_put;
999
1000 size_max = po->tx_ring.frame_size
Gabor Gombasb5dd8842009-10-29 03:19:11 -07001001 - (po->tp_hdrlen - sizeof(struct sockaddr_ll));
Johann Baudy69e3c752009-05-18 22:11:22 -07001002
1003 if (size_max > dev->mtu + reserve)
1004 size_max = dev->mtu + reserve;
1005
1006 do {
1007 ph = packet_current_frame(po, &po->tx_ring,
1008 TP_STATUS_SEND_REQUEST);
1009
1010 if (unlikely(ph == NULL)) {
1011 schedule();
1012 continue;
1013 }
1014
1015 status = TP_STATUS_SEND_REQUEST;
1016 skb = sock_alloc_send_skb(&po->sk,
1017 LL_ALLOCATED_SPACE(dev)
1018 + sizeof(struct sockaddr_ll),
1019 0, &err);
1020
1021 if (unlikely(skb == NULL))
1022 goto out_status;
1023
1024 tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
1025 addr);
1026
1027 if (unlikely(tp_len < 0)) {
1028 if (po->tp_loss) {
1029 __packet_set_status(po, ph,
1030 TP_STATUS_AVAILABLE);
1031 packet_increment_head(&po->tx_ring);
1032 kfree_skb(skb);
1033 continue;
1034 } else {
1035 status = TP_STATUS_WRONG_FORMAT;
1036 err = tp_len;
1037 goto out_status;
1038 }
1039 }
1040
1041 skb->destructor = tpacket_destruct_skb;
1042 __packet_set_status(po, ph, TP_STATUS_SENDING);
1043 atomic_inc(&po->tx_ring.pending);
1044
1045 status = TP_STATUS_SEND_REQUEST;
1046 err = dev_queue_xmit(skb);
Jarek Poplawskieb70df12010-01-10 22:04:19 +00001047 if (unlikely(err > 0)) {
1048 err = net_xmit_errno(err);
1049 if (err && __packet_get_status(po, ph) ==
1050 TP_STATUS_AVAILABLE) {
1051 /* skb was destructed already */
1052 skb = NULL;
1053 goto out_status;
1054 }
1055 /*
1056 * skb was dropped but not destructed yet;
1057 * let's treat it like congestion or err < 0
1058 */
1059 err = 0;
1060 }
Johann Baudy69e3c752009-05-18 22:11:22 -07001061 packet_increment_head(&po->tx_ring);
1062 len_sum += tp_len;
Joe Perchesf64f9e72009-11-29 16:55:45 -08001063 } while (likely((ph != NULL) ||
1064 ((!(msg->msg_flags & MSG_DONTWAIT)) &&
1065 (atomic_read(&po->tx_ring.pending))))
1066 );
Johann Baudy69e3c752009-05-18 22:11:22 -07001067
1068 err = len_sum;
1069 goto out_put;
1070
Johann Baudy69e3c752009-05-18 22:11:22 -07001071out_status:
1072 __packet_set_status(po, ph, status);
1073 kfree_skb(skb);
1074out_put:
1075 dev_put(dev);
1076out:
1077 mutex_unlock(&po->pg_vec_lock);
1078 return err;
1079}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001080
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001081static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
1082 size_t reserve, size_t len,
1083 size_t linear, int noblock,
1084 int *err)
1085{
1086 struct sk_buff *skb;
1087
1088 /* Under a page? Don't bother with paged skb. */
1089 if (prepad + len < PAGE_SIZE || !linear)
1090 linear = len;
1091
1092 skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
1093 err);
1094 if (!skb)
1095 return NULL;
1096
1097 skb_reserve(skb, reserve);
1098 skb_put(skb, linear);
1099 skb->data_len = len - linear;
1100 skb->len += len - linear;
1101
1102 return skb;
1103}
1104
Johann Baudy69e3c752009-05-18 22:11:22 -07001105static int packet_snd(struct socket *sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -07001106 struct msghdr *msg, size_t len)
1107{
1108 struct sock *sk = sock->sk;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001109 struct sockaddr_ll *saddr = (struct sockaddr_ll *)msg->msg_name;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001110 struct sk_buff *skb;
1111 struct net_device *dev;
Al Viro0e11c912006-11-08 00:26:29 -08001112 __be16 proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001113 unsigned char *addr;
1114 int ifindex, err, reserve = 0;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001115 struct virtio_net_hdr vnet_hdr = { 0 };
1116 int offset = 0;
1117 int vnet_hdr_len;
1118 struct packet_sock *po = pkt_sk(sk);
1119 unsigned short gso_type = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001120
1121 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001122 * Get and verify the address.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001123 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001124
Linus Torvalds1da177e2005-04-16 15:20:36 -07001125 if (saddr == NULL) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001126 ifindex = po->ifindex;
1127 proto = po->num;
1128 addr = NULL;
1129 } else {
1130 err = -EINVAL;
1131 if (msg->msg_namelen < sizeof(struct sockaddr_ll))
1132 goto out;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001133 if (msg->msg_namelen < (saddr->sll_halen + offsetof(struct sockaddr_ll, sll_addr)))
1134 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001135 ifindex = saddr->sll_ifindex;
1136 proto = saddr->sll_protocol;
1137 addr = saddr->sll_addr;
1138 }
1139
1140
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001141 dev = dev_get_by_index(sock_net(sk), ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001142 err = -ENXIO;
1143 if (dev == NULL)
1144 goto out_unlock;
1145 if (sock->type == SOCK_RAW)
1146 reserve = dev->hard_header_len;
1147
David S. Millerd5e76b02007-01-25 19:30:36 -08001148 err = -ENETDOWN;
1149 if (!(dev->flags & IFF_UP))
1150 goto out_unlock;
1151
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001152 if (po->has_vnet_hdr) {
1153 vnet_hdr_len = sizeof(vnet_hdr);
1154
1155 err = -EINVAL;
1156 if (len < vnet_hdr_len)
1157 goto out_unlock;
1158
1159 len -= vnet_hdr_len;
1160
1161 err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
1162 vnet_hdr_len);
1163 if (err < 0)
1164 goto out_unlock;
1165
1166 if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
1167 (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
1168 vnet_hdr.hdr_len))
1169 vnet_hdr.hdr_len = vnet_hdr.csum_start +
1170 vnet_hdr.csum_offset + 2;
1171
1172 err = -EINVAL;
1173 if (vnet_hdr.hdr_len > len)
1174 goto out_unlock;
1175
1176 if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
1177 switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
1178 case VIRTIO_NET_HDR_GSO_TCPV4:
1179 gso_type = SKB_GSO_TCPV4;
1180 break;
1181 case VIRTIO_NET_HDR_GSO_TCPV6:
1182 gso_type = SKB_GSO_TCPV6;
1183 break;
1184 case VIRTIO_NET_HDR_GSO_UDP:
1185 gso_type = SKB_GSO_UDP;
1186 break;
1187 default:
1188 goto out_unlock;
1189 }
1190
1191 if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
1192 gso_type |= SKB_GSO_TCP_ECN;
1193
1194 if (vnet_hdr.gso_size == 0)
1195 goto out_unlock;
1196
1197 }
1198 }
1199
Linus Torvalds1da177e2005-04-16 15:20:36 -07001200 err = -EMSGSIZE;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001201 if (!gso_type && (len > dev->mtu+reserve))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001202 goto out_unlock;
1203
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001204 err = -ENOBUFS;
1205 skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
1206 LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
1207 msg->msg_flags & MSG_DONTWAIT, &err);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001208 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001209 goto out_unlock;
1210
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001211 skb_set_network_header(skb, reserve);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001212
Stephen Hemminger0c4e8582007-10-09 01:36:32 -07001213 err = -EINVAL;
1214 if (sock->type == SOCK_DGRAM &&
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001215 (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
Stephen Hemminger0c4e8582007-10-09 01:36:32 -07001216 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001217
1218 /* Returns -EFAULT on error */
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001219 err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001220 if (err)
1221 goto out_free;
Oliver Hartkopp2244d072010-08-17 08:59:14 +00001222 err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags);
Richard Cochraned85b562010-04-07 22:41:28 +00001223 if (err < 0)
1224 goto out_free;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001225
1226 skb->protocol = proto;
1227 skb->dev = dev;
1228 skb->priority = sk->sk_priority;
Eric Dumazet2d37a182009-10-01 19:14:46 +00001229 skb->mark = sk->sk_mark;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001230
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001231 if (po->has_vnet_hdr) {
1232 if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
1233 if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
1234 vnet_hdr.csum_offset)) {
1235 err = -EINVAL;
1236 goto out_free;
1237 }
1238 }
1239
1240 skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
1241 skb_shinfo(skb)->gso_type = gso_type;
1242
1243 /* Header must be checked, and gso_segs computed. */
1244 skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
1245 skb_shinfo(skb)->gso_segs = 0;
1246
1247 len += vnet_hdr_len;
1248 }
1249
Linus Torvalds1da177e2005-04-16 15:20:36 -07001250 /*
1251 * Now send it
1252 */
1253
1254 err = dev_queue_xmit(skb);
1255 if (err > 0 && (err = net_xmit_errno(err)) != 0)
1256 goto out_unlock;
1257
1258 dev_put(dev);
1259
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001260 return len;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001261
1262out_free:
1263 kfree_skb(skb);
1264out_unlock:
1265 if (dev)
1266 dev_put(dev);
1267out:
1268 return err;
1269}
1270
Johann Baudy69e3c752009-05-18 22:11:22 -07001271static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
1272 struct msghdr *msg, size_t len)
1273{
Johann Baudy69e3c752009-05-18 22:11:22 -07001274 struct sock *sk = sock->sk;
1275 struct packet_sock *po = pkt_sk(sk);
1276 if (po->tx_ring.pg_vec)
1277 return tpacket_snd(po, msg);
1278 else
Johann Baudy69e3c752009-05-18 22:11:22 -07001279 return packet_snd(sock, msg, len);
1280}
1281
Linus Torvalds1da177e2005-04-16 15:20:36 -07001282/*
1283 * Close a PACKET socket. This is fairly simple. We immediately go
1284 * to 'closed' state and remove our protocol entry in the device list.
1285 */
1286
1287static int packet_release(struct socket *sock)
1288{
1289 struct sock *sk = sock->sk;
1290 struct packet_sock *po;
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08001291 struct net *net;
Johann Baudy69e3c752009-05-18 22:11:22 -07001292 struct tpacket_req req;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001293
1294 if (!sk)
1295 return 0;
1296
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001297 net = sock_net(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001298 po = pkt_sk(sk);
1299
stephen hemminger808f5112010-02-22 07:57:18 +00001300 spin_lock_bh(&net->packet.sklist_lock);
1301 sk_del_node_init_rcu(sk);
Eric Dumazet920de802008-11-24 00:09:29 -08001302 sock_prot_inuse_add(net, sk->sk_prot, -1);
stephen hemminger808f5112010-02-22 07:57:18 +00001303 spin_unlock_bh(&net->packet.sklist_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001304
stephen hemminger808f5112010-02-22 07:57:18 +00001305 spin_lock(&po->bind_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001306 if (po->running) {
1307 /*
stephen hemminger808f5112010-02-22 07:57:18 +00001308 * Remove from protocol table
Linus Torvalds1da177e2005-04-16 15:20:36 -07001309 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001310 po->running = 0;
1311 po->num = 0;
stephen hemminger808f5112010-02-22 07:57:18 +00001312 __dev_remove_pack(&po->prot_hook);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001313 __sock_put(sk);
1314 }
stephen hemminger808f5112010-02-22 07:57:18 +00001315 spin_unlock(&po->bind_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001316
Linus Torvalds1da177e2005-04-16 15:20:36 -07001317 packet_flush_mclist(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001318
Johann Baudy69e3c752009-05-18 22:11:22 -07001319 memset(&req, 0, sizeof(req));
1320
1321 if (po->rx_ring.pg_vec)
1322 packet_set_ring(sk, &req, 1, 0);
1323
1324 if (po->tx_ring.pg_vec)
1325 packet_set_ring(sk, &req, 1, 1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001326
stephen hemminger808f5112010-02-22 07:57:18 +00001327 synchronize_net();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001328 /*
1329 * Now the socket is dead. No more input will appear.
1330 */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001331 sock_orphan(sk);
1332 sock->sk = NULL;
1333
1334 /* Purge queues */
1335
1336 skb_queue_purge(&sk->sk_receive_queue);
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001337 sk_refcnt_debug_release(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001338
1339 sock_put(sk);
1340 return 0;
1341}
1342
1343/*
1344 * Attach a packet hook.
1345 */
1346
Al Viro0e11c912006-11-08 00:26:29 -08001347static int packet_do_bind(struct sock *sk, struct net_device *dev, __be16 protocol)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001348{
1349 struct packet_sock *po = pkt_sk(sk);
1350 /*
1351 * Detach an existing hook if present.
1352 */
1353
1354 lock_sock(sk);
1355
1356 spin_lock(&po->bind_lock);
1357 if (po->running) {
1358 __sock_put(sk);
1359 po->running = 0;
1360 po->num = 0;
1361 spin_unlock(&po->bind_lock);
1362 dev_remove_pack(&po->prot_hook);
1363 spin_lock(&po->bind_lock);
1364 }
1365
1366 po->num = protocol;
1367 po->prot_hook.type = protocol;
1368 po->prot_hook.dev = dev;
1369
1370 po->ifindex = dev ? dev->ifindex : 0;
1371
1372 if (protocol == 0)
1373 goto out_unlock;
1374
Urs Thuermannbe85d4a2007-11-12 21:05:20 -08001375 if (!dev || (dev->flags & IFF_UP)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001376 dev_add_pack(&po->prot_hook);
1377 sock_hold(sk);
1378 po->running = 1;
Urs Thuermannbe85d4a2007-11-12 21:05:20 -08001379 } else {
1380 sk->sk_err = ENETDOWN;
1381 if (!sock_flag(sk, SOCK_DEAD))
1382 sk->sk_error_report(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001383 }
1384
1385out_unlock:
1386 spin_unlock(&po->bind_lock);
1387 release_sock(sk);
1388 return 0;
1389}
1390
1391/*
1392 * Bind a packet socket to a device
1393 */
1394
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001395static int packet_bind_spkt(struct socket *sock, struct sockaddr *uaddr,
1396 int addr_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001397{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001398 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001399 char name[15];
1400 struct net_device *dev;
1401 int err = -ENODEV;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001402
Linus Torvalds1da177e2005-04-16 15:20:36 -07001403 /*
1404 * Check legality
1405 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001406
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001407 if (addr_len != sizeof(struct sockaddr))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001408 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001409 strlcpy(name, uaddr->sa_data, sizeof(name));
Linus Torvalds1da177e2005-04-16 15:20:36 -07001410
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001411 dev = dev_get_by_name(sock_net(sk), name);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001412 if (dev) {
1413 err = packet_do_bind(sk, dev, pkt_sk(sk)->num);
1414 dev_put(dev);
1415 }
1416 return err;
1417}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001418
1419static int packet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1420{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001421 struct sockaddr_ll *sll = (struct sockaddr_ll *)uaddr;
1422 struct sock *sk = sock->sk;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001423 struct net_device *dev = NULL;
1424 int err;
1425
1426
1427 /*
1428 * Check legality
1429 */
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001430
Linus Torvalds1da177e2005-04-16 15:20:36 -07001431 if (addr_len < sizeof(struct sockaddr_ll))
1432 return -EINVAL;
1433 if (sll->sll_family != AF_PACKET)
1434 return -EINVAL;
1435
1436 if (sll->sll_ifindex) {
1437 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001438 dev = dev_get_by_index(sock_net(sk), sll->sll_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001439 if (dev == NULL)
1440 goto out;
1441 }
1442 err = packet_do_bind(sk, dev, sll->sll_protocol ? : pkt_sk(sk)->num);
1443 if (dev)
1444 dev_put(dev);
1445
1446out:
1447 return err;
1448}
1449
1450static struct proto packet_proto = {
1451 .name = "PACKET",
1452 .owner = THIS_MODULE,
1453 .obj_size = sizeof(struct packet_sock),
1454};
1455
1456/*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001457 * Create a packet of type SOCK_PACKET.
Linus Torvalds1da177e2005-04-16 15:20:36 -07001458 */
1459
Eric Paris3f378b62009-11-05 22:18:14 -08001460static int packet_create(struct net *net, struct socket *sock, int protocol,
1461 int kern)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001462{
1463 struct sock *sk;
1464 struct packet_sock *po;
Al Viro0e11c912006-11-08 00:26:29 -08001465 __be16 proto = (__force __be16)protocol; /* weird, but documented */
Linus Torvalds1da177e2005-04-16 15:20:36 -07001466 int err;
1467
1468 if (!capable(CAP_NET_RAW))
1469 return -EPERM;
David S. Millerbe020972007-05-29 13:16:31 -07001470 if (sock->type != SOCK_DGRAM && sock->type != SOCK_RAW &&
1471 sock->type != SOCK_PACKET)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001472 return -ESOCKTNOSUPPORT;
1473
1474 sock->state = SS_UNCONNECTED;
1475
1476 err = -ENOBUFS;
Pavel Emelyanov6257ff22007-11-01 00:39:31 -07001477 sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001478 if (sk == NULL)
1479 goto out;
1480
1481 sock->ops = &packet_ops;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001482 if (sock->type == SOCK_PACKET)
1483 sock->ops = &packet_ops_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001484
Linus Torvalds1da177e2005-04-16 15:20:36 -07001485 sock_init_data(sock, sk);
1486
1487 po = pkt_sk(sk);
1488 sk->sk_family = PF_PACKET;
Al Viro0e11c912006-11-08 00:26:29 -08001489 po->num = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001490
1491 sk->sk_destruct = packet_sock_destruct;
Pavel Emelyanov17ab56a2007-11-10 21:38:48 -08001492 sk_refcnt_debug_inc(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001493
1494 /*
1495 * Attach a protocol block
1496 */
1497
1498 spin_lock_init(&po->bind_lock);
Herbert Xu905db442009-01-30 14:12:06 -08001499 mutex_init(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001500 po->prot_hook.func = packet_rcv;
David S. Millerbe020972007-05-29 13:16:31 -07001501
Linus Torvalds1da177e2005-04-16 15:20:36 -07001502 if (sock->type == SOCK_PACKET)
1503 po->prot_hook.func = packet_rcv_spkt;
David S. Millerbe020972007-05-29 13:16:31 -07001504
Linus Torvalds1da177e2005-04-16 15:20:36 -07001505 po->prot_hook.af_packet_priv = sk;
1506
Al Viro0e11c912006-11-08 00:26:29 -08001507 if (proto) {
1508 po->prot_hook.type = proto;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001509 dev_add_pack(&po->prot_hook);
1510 sock_hold(sk);
1511 po->running = 1;
1512 }
1513
stephen hemminger808f5112010-02-22 07:57:18 +00001514 spin_lock_bh(&net->packet.sklist_lock);
1515 sk_add_node_rcu(sk, &net->packet.sklist);
Eric Dumazet36804532008-11-19 14:25:35 -08001516 sock_prot_inuse_add(net, &packet_proto, 1);
stephen hemminger808f5112010-02-22 07:57:18 +00001517 spin_unlock_bh(&net->packet.sklist_lock);
1518
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001519 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001520out:
1521 return err;
1522}
1523
Richard Cochraned85b562010-04-07 22:41:28 +00001524static int packet_recv_error(struct sock *sk, struct msghdr *msg, int len)
1525{
1526 struct sock_exterr_skb *serr;
1527 struct sk_buff *skb, *skb2;
1528 int copied, err;
1529
1530 err = -EAGAIN;
1531 skb = skb_dequeue(&sk->sk_error_queue);
1532 if (skb == NULL)
1533 goto out;
1534
1535 copied = skb->len;
1536 if (copied > len) {
1537 msg->msg_flags |= MSG_TRUNC;
1538 copied = len;
1539 }
1540 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1541 if (err)
1542 goto out_free_skb;
1543
1544 sock_recv_timestamp(msg, sk, skb);
1545
1546 serr = SKB_EXT_ERR(skb);
1547 put_cmsg(msg, SOL_PACKET, PACKET_TX_TIMESTAMP,
1548 sizeof(serr->ee), &serr->ee);
1549
1550 msg->msg_flags |= MSG_ERRQUEUE;
1551 err = copied;
1552
1553 /* Reset and regenerate socket error */
1554 spin_lock_bh(&sk->sk_error_queue.lock);
1555 sk->sk_err = 0;
1556 if ((skb2 = skb_peek(&sk->sk_error_queue)) != NULL) {
1557 sk->sk_err = SKB_EXT_ERR(skb2)->ee.ee_errno;
1558 spin_unlock_bh(&sk->sk_error_queue.lock);
1559 sk->sk_error_report(sk);
1560 } else
1561 spin_unlock_bh(&sk->sk_error_queue.lock);
1562
1563out_free_skb:
1564 kfree_skb(skb);
1565out:
1566 return err;
1567}
1568
Linus Torvalds1da177e2005-04-16 15:20:36 -07001569/*
1570 * Pull a packet from our receive queue and hand it to the user.
1571 * If necessary we block.
1572 */
1573
1574static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
1575 struct msghdr *msg, size_t len, int flags)
1576{
1577 struct sock *sk = sock->sk;
1578 struct sk_buff *skb;
1579 int copied, err;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001580 struct sockaddr_ll *sll;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001581 int vnet_hdr_len = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001582
1583 err = -EINVAL;
Richard Cochraned85b562010-04-07 22:41:28 +00001584 if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001585 goto out;
1586
1587#if 0
1588 /* What error should we return now? EUNATTACH? */
1589 if (pkt_sk(sk)->ifindex < 0)
1590 return -ENODEV;
1591#endif
1592
Richard Cochraned85b562010-04-07 22:41:28 +00001593 if (flags & MSG_ERRQUEUE) {
1594 err = packet_recv_error(sk, msg, len);
1595 goto out;
1596 }
1597
Linus Torvalds1da177e2005-04-16 15:20:36 -07001598 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001599 * Call the generic datagram receiver. This handles all sorts
1600 * of horrible races and re-entrancy so we can forget about it
1601 * in the protocol layers.
1602 *
1603 * Now it will return ENETDOWN, if device have just gone down,
1604 * but then it will block.
1605 */
1606
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001607 skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001608
1609 /*
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001610 * An error occurred so return it. Because skb_recv_datagram()
Linus Torvalds1da177e2005-04-16 15:20:36 -07001611 * handles the blocking we don't see and worry about blocking
1612 * retries.
1613 */
1614
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08001615 if (skb == NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001616 goto out;
1617
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001618 if (pkt_sk(sk)->has_vnet_hdr) {
1619 struct virtio_net_hdr vnet_hdr = { 0 };
1620
1621 err = -EINVAL;
1622 vnet_hdr_len = sizeof(vnet_hdr);
Mariusz Kozlowski1f18b712010-11-08 11:58:45 +00001623 if (len < vnet_hdr_len)
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001624 goto out_free;
1625
Mariusz Kozlowski1f18b712010-11-08 11:58:45 +00001626 len -= vnet_hdr_len;
1627
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001628 if (skb_is_gso(skb)) {
1629 struct skb_shared_info *sinfo = skb_shinfo(skb);
1630
1631 /* This is a hint as to how much should be linear. */
1632 vnet_hdr.hdr_len = skb_headlen(skb);
1633 vnet_hdr.gso_size = sinfo->gso_size;
1634 if (sinfo->gso_type & SKB_GSO_TCPV4)
1635 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1636 else if (sinfo->gso_type & SKB_GSO_TCPV6)
1637 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
1638 else if (sinfo->gso_type & SKB_GSO_UDP)
1639 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
1640 else if (sinfo->gso_type & SKB_GSO_FCOE)
1641 goto out_free;
1642 else
1643 BUG();
1644 if (sinfo->gso_type & SKB_GSO_TCP_ECN)
1645 vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
1646 } else
1647 vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
1648
1649 if (skb->ip_summed == CHECKSUM_PARTIAL) {
1650 vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1651 vnet_hdr.csum_start = skb->csum_start -
1652 skb_headroom(skb);
1653 vnet_hdr.csum_offset = skb->csum_offset;
1654 } /* else everything is zero */
1655
1656 err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
1657 vnet_hdr_len);
1658 if (err < 0)
1659 goto out_free;
1660 }
1661
Linus Torvalds1da177e2005-04-16 15:20:36 -07001662 /*
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001663 * If the address length field is there to be filled in, we fill
1664 * it in now.
1665 */
1666
Herbert Xuffbc6112007-02-04 23:33:10 -08001667 sll = &PACKET_SKB_CB(skb)->sa.ll;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001668 if (sock->type == SOCK_PACKET)
1669 msg->msg_namelen = sizeof(struct sockaddr_pkt);
1670 else
1671 msg->msg_namelen = sll->sll_halen + offsetof(struct sockaddr_ll, sll_addr);
1672
1673 /*
Linus Torvalds1da177e2005-04-16 15:20:36 -07001674 * You lose any data beyond the buffer you gave. If it worries a
1675 * user program they can ask the device for its MTU anyway.
1676 */
1677
1678 copied = skb->len;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001679 if (copied > len) {
1680 copied = len;
1681 msg->msg_flags |= MSG_TRUNC;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001682 }
1683
1684 err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
1685 if (err)
1686 goto out_free;
1687
Neil Horman3b885782009-10-12 13:26:31 -07001688 sock_recv_ts_and_drops(msg, sk, skb);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001689
1690 if (msg->msg_name)
Herbert Xuffbc6112007-02-04 23:33:10 -08001691 memcpy(msg->msg_name, &PACKET_SKB_CB(skb)->sa,
1692 msg->msg_namelen);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001693
Herbert Xu8dc41942007-02-04 23:31:32 -08001694 if (pkt_sk(sk)->auxdata) {
Herbert Xuffbc6112007-02-04 23:33:10 -08001695 struct tpacket_auxdata aux;
1696
1697 aux.tp_status = TP_STATUS_USER;
1698 if (skb->ip_summed == CHECKSUM_PARTIAL)
1699 aux.tp_status |= TP_STATUS_CSUMNOTREADY;
1700 aux.tp_len = PACKET_SKB_CB(skb)->origlen;
1701 aux.tp_snaplen = skb->len;
1702 aux.tp_mac = 0;
Arnaldo Carvalho de Melobbe735e2007-03-10 22:16:10 -03001703 aux.tp_net = skb_network_offset(skb);
Eric Dumazet05423b22009-10-26 18:40:35 -07001704 aux.tp_vlan_tci = vlan_tx_tag_get(skb);
Herbert Xuffbc6112007-02-04 23:33:10 -08001705
1706 put_cmsg(msg, SOL_PACKET, PACKET_AUXDATA, sizeof(aux), &aux);
Herbert Xu8dc41942007-02-04 23:31:32 -08001707 }
1708
Linus Torvalds1da177e2005-04-16 15:20:36 -07001709 /*
1710 * Free or return the buffer as appropriate. Again this
1711 * hides all the races and re-entrancy issues from us.
1712 */
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001713 err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001714
1715out_free:
1716 skb_free_datagram(sk, skb);
1717out:
1718 return err;
1719}
1720
Linus Torvalds1da177e2005-04-16 15:20:36 -07001721static int packet_getname_spkt(struct socket *sock, struct sockaddr *uaddr,
1722 int *uaddr_len, int peer)
1723{
1724 struct net_device *dev;
1725 struct sock *sk = sock->sk;
1726
1727 if (peer)
1728 return -EOPNOTSUPP;
1729
1730 uaddr->sa_family = AF_PACKET;
Eric Dumazet654d1f82009-11-02 10:43:32 +01001731 rcu_read_lock();
1732 dev = dev_get_by_index_rcu(sock_net(sk), pkt_sk(sk)->ifindex);
1733 if (dev)
Vasiliy Kulikov67286642010-11-10 12:09:10 -08001734 strncpy(uaddr->sa_data, dev->name, 14);
Eric Dumazet654d1f82009-11-02 10:43:32 +01001735 else
Linus Torvalds1da177e2005-04-16 15:20:36 -07001736 memset(uaddr->sa_data, 0, 14);
Eric Dumazet654d1f82009-11-02 10:43:32 +01001737 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07001738 *uaddr_len = sizeof(*uaddr);
1739
1740 return 0;
1741}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001742
1743static int packet_getname(struct socket *sock, struct sockaddr *uaddr,
1744 int *uaddr_len, int peer)
1745{
1746 struct net_device *dev;
1747 struct sock *sk = sock->sk;
1748 struct packet_sock *po = pkt_sk(sk);
Cyrill Gorcunov13cfa972009-11-08 05:51:19 +00001749 DECLARE_SOCKADDR(struct sockaddr_ll *, sll, uaddr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001750
1751 if (peer)
1752 return -EOPNOTSUPP;
1753
1754 sll->sll_family = AF_PACKET;
1755 sll->sll_ifindex = po->ifindex;
1756 sll->sll_protocol = po->num;
Vasiliy Kulikov67286642010-11-10 12:09:10 -08001757 sll->sll_pkttype = 0;
Eric Dumazet654d1f82009-11-02 10:43:32 +01001758 rcu_read_lock();
1759 dev = dev_get_by_index_rcu(sock_net(sk), po->ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001760 if (dev) {
1761 sll->sll_hatype = dev->type;
1762 sll->sll_halen = dev->addr_len;
1763 memcpy(sll->sll_addr, dev->dev_addr, dev->addr_len);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001764 } else {
1765 sll->sll_hatype = 0; /* Bad: we have no ARPHRD_UNSPEC */
1766 sll->sll_halen = 0;
1767 }
Eric Dumazet654d1f82009-11-02 10:43:32 +01001768 rcu_read_unlock();
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001769 *uaddr_len = offsetof(struct sockaddr_ll, sll_addr) + sll->sll_halen;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001770
1771 return 0;
1772}
1773
Wang Chen2aeb0b82008-07-14 20:49:46 -07001774static int packet_dev_mc(struct net_device *dev, struct packet_mclist *i,
1775 int what)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001776{
1777 switch (i->type) {
1778 case PACKET_MR_MULTICAST:
Jiri Pirko11625632010-03-02 20:40:01 +00001779 if (i->alen != dev->addr_len)
1780 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001781 if (what > 0)
Jiri Pirko22bedad32010-04-01 21:22:57 +00001782 return dev_mc_add(dev, i->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001783 else
Jiri Pirko22bedad32010-04-01 21:22:57 +00001784 return dev_mc_del(dev, i->addr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001785 break;
1786 case PACKET_MR_PROMISC:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001787 return dev_set_promiscuity(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001788 break;
1789 case PACKET_MR_ALLMULTI:
Wang Chen2aeb0b82008-07-14 20:49:46 -07001790 return dev_set_allmulti(dev, what);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001791 break;
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001792 case PACKET_MR_UNICAST:
Jiri Pirko11625632010-03-02 20:40:01 +00001793 if (i->alen != dev->addr_len)
1794 return -EINVAL;
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001795 if (what > 0)
Jiri Pirkoa748ee22010-04-01 21:22:09 +00001796 return dev_uc_add(dev, i->addr);
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001797 else
Jiri Pirkoa748ee22010-04-01 21:22:09 +00001798 return dev_uc_del(dev, i->addr);
Eric W. Biedermand95ed922009-05-19 18:27:17 +00001799 break;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001800 default:
1801 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001802 }
Wang Chen2aeb0b82008-07-14 20:49:46 -07001803 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001804}
1805
1806static void packet_dev_mclist(struct net_device *dev, struct packet_mclist *i, int what)
1807{
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001808 for ( ; i; i = i->next) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07001809 if (i->ifindex == dev->ifindex)
1810 packet_dev_mc(dev, i, what);
1811 }
1812}
1813
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001814static int packet_mc_add(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001815{
1816 struct packet_sock *po = pkt_sk(sk);
1817 struct packet_mclist *ml, *i;
1818 struct net_device *dev;
1819 int err;
1820
1821 rtnl_lock();
1822
1823 err = -ENODEV;
YOSHIFUJI Hideaki3b1e0a62008-03-26 02:26:21 +09001824 dev = __dev_get_by_index(sock_net(sk), mreq->mr_ifindex);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001825 if (!dev)
1826 goto done;
1827
1828 err = -EINVAL;
Jiri Pirko11625632010-03-02 20:40:01 +00001829 if (mreq->mr_alen > dev->addr_len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001830 goto done;
1831
1832 err = -ENOBUFS;
Kris Katterjohn8b3a7002006-01-11 15:56:43 -08001833 i = kmalloc(sizeof(*i), GFP_KERNEL);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001834 if (i == NULL)
1835 goto done;
1836
1837 err = 0;
1838 for (ml = po->mclist; ml; ml = ml->next) {
1839 if (ml->ifindex == mreq->mr_ifindex &&
1840 ml->type == mreq->mr_type &&
1841 ml->alen == mreq->mr_alen &&
1842 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1843 ml->count++;
1844 /* Free the new element ... */
1845 kfree(i);
1846 goto done;
1847 }
1848 }
1849
1850 i->type = mreq->mr_type;
1851 i->ifindex = mreq->mr_ifindex;
1852 i->alen = mreq->mr_alen;
1853 memcpy(i->addr, mreq->mr_address, i->alen);
1854 i->count = 1;
1855 i->next = po->mclist;
1856 po->mclist = i;
Wang Chen2aeb0b82008-07-14 20:49:46 -07001857 err = packet_dev_mc(dev, i, 1);
1858 if (err) {
1859 po->mclist = i->next;
1860 kfree(i);
1861 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07001862
1863done:
1864 rtnl_unlock();
1865 return err;
1866}
1867
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001868static int packet_mc_drop(struct sock *sk, struct packet_mreq_max *mreq)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001869{
1870 struct packet_mclist *ml, **mlp;
1871
1872 rtnl_lock();
1873
1874 for (mlp = &pkt_sk(sk)->mclist; (ml = *mlp) != NULL; mlp = &ml->next) {
1875 if (ml->ifindex == mreq->mr_ifindex &&
1876 ml->type == mreq->mr_type &&
1877 ml->alen == mreq->mr_alen &&
1878 memcmp(ml->addr, mreq->mr_address, ml->alen) == 0) {
1879 if (--ml->count == 0) {
1880 struct net_device *dev;
1881 *mlp = ml->next;
Eric Dumazetad959e72009-10-16 06:38:46 +00001882 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1883 if (dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001884 packet_dev_mc(dev, ml, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001885 kfree(ml);
1886 }
1887 rtnl_unlock();
1888 return 0;
1889 }
1890 }
1891 rtnl_unlock();
1892 return -EADDRNOTAVAIL;
1893}
1894
1895static void packet_flush_mclist(struct sock *sk)
1896{
1897 struct packet_sock *po = pkt_sk(sk);
1898 struct packet_mclist *ml;
1899
1900 if (!po->mclist)
1901 return;
1902
1903 rtnl_lock();
1904 while ((ml = po->mclist) != NULL) {
1905 struct net_device *dev;
1906
1907 po->mclist = ml->next;
Eric Dumazetad959e72009-10-16 06:38:46 +00001908 dev = __dev_get_by_index(sock_net(sk), ml->ifindex);
1909 if (dev != NULL)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001910 packet_dev_mc(dev, ml, -1);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001911 kfree(ml);
1912 }
1913 rtnl_unlock();
1914}
Linus Torvalds1da177e2005-04-16 15:20:36 -07001915
1916static int
David S. Millerb7058842009-09-30 16:12:20 -07001917packet_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen)
Linus Torvalds1da177e2005-04-16 15:20:36 -07001918{
1919 struct sock *sk = sock->sk;
Herbert Xu8dc41942007-02-04 23:31:32 -08001920 struct packet_sock *po = pkt_sk(sk);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001921 int ret;
1922
1923 if (level != SOL_PACKET)
1924 return -ENOPROTOOPT;
1925
Johann Baudy69e3c752009-05-18 22:11:22 -07001926 switch (optname) {
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09001927 case PACKET_ADD_MEMBERSHIP:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001928 case PACKET_DROP_MEMBERSHIP:
1929 {
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001930 struct packet_mreq_max mreq;
1931 int len = optlen;
1932 memset(&mreq, 0, sizeof(mreq));
1933 if (len < sizeof(struct packet_mreq))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001934 return -EINVAL;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001935 if (len > sizeof(mreq))
1936 len = sizeof(mreq);
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001937 if (copy_from_user(&mreq, optval, len))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001938 return -EFAULT;
Eric W. Biederman0fb375f2005-09-21 00:11:37 -07001939 if (len < (mreq.mr_alen + offsetof(struct packet_mreq, mr_address)))
1940 return -EINVAL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07001941 if (optname == PACKET_ADD_MEMBERSHIP)
1942 ret = packet_mc_add(sk, &mreq);
1943 else
1944 ret = packet_mc_drop(sk, &mreq);
1945 return ret;
1946 }
David S. Millera2efcfa2007-05-29 13:12:50 -07001947
Linus Torvalds1da177e2005-04-16 15:20:36 -07001948 case PACKET_RX_RING:
Johann Baudy69e3c752009-05-18 22:11:22 -07001949 case PACKET_TX_RING:
Linus Torvalds1da177e2005-04-16 15:20:36 -07001950 {
1951 struct tpacket_req req;
1952
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001953 if (optlen < sizeof(req))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001954 return -EINVAL;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08001955 if (pkt_sk(sk)->has_vnet_hdr)
1956 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001957 if (copy_from_user(&req, optval, sizeof(req)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001958 return -EFAULT;
Johann Baudy69e3c752009-05-18 22:11:22 -07001959 return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
Linus Torvalds1da177e2005-04-16 15:20:36 -07001960 }
1961 case PACKET_COPY_THRESH:
1962 {
1963 int val;
1964
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001965 if (optlen != sizeof(val))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001966 return -EINVAL;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00001967 if (copy_from_user(&val, optval, sizeof(val)))
Linus Torvalds1da177e2005-04-16 15:20:36 -07001968 return -EFAULT;
1969
1970 pkt_sk(sk)->copy_thresh = val;
1971 return 0;
1972 }
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001973 case PACKET_VERSION:
1974 {
1975 int val;
1976
1977 if (optlen != sizeof(val))
1978 return -EINVAL;
Johann Baudy69e3c752009-05-18 22:11:22 -07001979 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
Patrick McHardybbd6ef82008-07-14 22:50:15 -07001980 return -EBUSY;
1981 if (copy_from_user(&val, optval, sizeof(val)))
1982 return -EFAULT;
1983 switch (val) {
1984 case TPACKET_V1:
1985 case TPACKET_V2:
1986 po->tp_version = val;
1987 return 0;
1988 default:
1989 return -EINVAL;
1990 }
1991 }
Patrick McHardy8913336a2008-07-18 18:05:19 -07001992 case PACKET_RESERVE:
1993 {
1994 unsigned int val;
1995
1996 if (optlen != sizeof(val))
1997 return -EINVAL;
Johann Baudy69e3c752009-05-18 22:11:22 -07001998 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
Patrick McHardy8913336a2008-07-18 18:05:19 -07001999 return -EBUSY;
2000 if (copy_from_user(&val, optval, sizeof(val)))
2001 return -EFAULT;
2002 po->tp_reserve = val;
2003 return 0;
2004 }
Johann Baudy69e3c752009-05-18 22:11:22 -07002005 case PACKET_LOSS:
2006 {
2007 unsigned int val;
2008
2009 if (optlen != sizeof(val))
2010 return -EINVAL;
2011 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2012 return -EBUSY;
2013 if (copy_from_user(&val, optval, sizeof(val)))
2014 return -EFAULT;
2015 po->tp_loss = !!val;
2016 return 0;
2017 }
Herbert Xu8dc41942007-02-04 23:31:32 -08002018 case PACKET_AUXDATA:
2019 {
2020 int val;
2021
2022 if (optlen < sizeof(val))
2023 return -EINVAL;
2024 if (copy_from_user(&val, optval, sizeof(val)))
2025 return -EFAULT;
2026
2027 po->auxdata = !!val;
2028 return 0;
2029 }
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07002030 case PACKET_ORIGDEV:
2031 {
2032 int val;
2033
2034 if (optlen < sizeof(val))
2035 return -EINVAL;
2036 if (copy_from_user(&val, optval, sizeof(val)))
2037 return -EFAULT;
2038
2039 po->origdev = !!val;
2040 return 0;
2041 }
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08002042 case PACKET_VNET_HDR:
2043 {
2044 int val;
2045
2046 if (sock->type != SOCK_RAW)
2047 return -EINVAL;
2048 if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
2049 return -EBUSY;
2050 if (optlen < sizeof(val))
2051 return -EINVAL;
2052 if (copy_from_user(&val, optval, sizeof(val)))
2053 return -EFAULT;
2054
2055 po->has_vnet_hdr = !!val;
2056 return 0;
2057 }
Scott McMillan614f60f2010-06-02 05:53:56 -07002058 case PACKET_TIMESTAMP:
2059 {
2060 int val;
2061
2062 if (optlen != sizeof(val))
2063 return -EINVAL;
2064 if (copy_from_user(&val, optval, sizeof(val)))
2065 return -EFAULT;
2066
2067 po->tp_tstamp = val;
2068 return 0;
2069 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002070 default:
2071 return -ENOPROTOOPT;
2072 }
2073}
2074
2075static int packet_getsockopt(struct socket *sock, int level, int optname,
2076 char __user *optval, int __user *optlen)
2077{
2078 int len;
Herbert Xu8dc41942007-02-04 23:31:32 -08002079 int val;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002080 struct sock *sk = sock->sk;
2081 struct packet_sock *po = pkt_sk(sk);
Herbert Xu8dc41942007-02-04 23:31:32 -08002082 void *data;
2083 struct tpacket_stats st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002084
2085 if (level != SOL_PACKET)
2086 return -ENOPROTOOPT;
2087
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002088 if (get_user(len, optlen))
2089 return -EFAULT;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002090
2091 if (len < 0)
2092 return -EINVAL;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002093
Johann Baudy69e3c752009-05-18 22:11:22 -07002094 switch (optname) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002095 case PACKET_STATISTICS:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002096 if (len > sizeof(struct tpacket_stats))
2097 len = sizeof(struct tpacket_stats);
2098 spin_lock_bh(&sk->sk_receive_queue.lock);
2099 st = po->stats;
2100 memset(&po->stats, 0, sizeof(st));
2101 spin_unlock_bh(&sk->sk_receive_queue.lock);
2102 st.tp_packets += st.tp_drops;
2103
Herbert Xu8dc41942007-02-04 23:31:32 -08002104 data = &st;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002105 break;
Herbert Xu8dc41942007-02-04 23:31:32 -08002106 case PACKET_AUXDATA:
2107 if (len > sizeof(int))
2108 len = sizeof(int);
2109 val = po->auxdata;
2110
2111 data = &val;
2112 break;
Peter P. Waskiewicz Jr80feaac2007-04-20 16:05:39 -07002113 case PACKET_ORIGDEV:
2114 if (len > sizeof(int))
2115 len = sizeof(int);
2116 val = po->origdev;
2117
2118 data = &val;
2119 break;
Sridhar Samudralabfd5f4a2010-02-04 20:24:10 -08002120 case PACKET_VNET_HDR:
2121 if (len > sizeof(int))
2122 len = sizeof(int);
2123 val = po->has_vnet_hdr;
2124
2125 data = &val;
2126 break;
Patrick McHardybbd6ef82008-07-14 22:50:15 -07002127 case PACKET_VERSION:
2128 if (len > sizeof(int))
2129 len = sizeof(int);
2130 val = po->tp_version;
2131 data = &val;
2132 break;
2133 case PACKET_HDRLEN:
2134 if (len > sizeof(int))
2135 len = sizeof(int);
2136 if (copy_from_user(&val, optval, len))
2137 return -EFAULT;
2138 switch (val) {
2139 case TPACKET_V1:
2140 val = sizeof(struct tpacket_hdr);
2141 break;
2142 case TPACKET_V2:
2143 val = sizeof(struct tpacket2_hdr);
2144 break;
2145 default:
2146 return -EINVAL;
2147 }
2148 data = &val;
2149 break;
Patrick McHardy8913336a2008-07-18 18:05:19 -07002150 case PACKET_RESERVE:
2151 if (len > sizeof(unsigned int))
2152 len = sizeof(unsigned int);
2153 val = po->tp_reserve;
2154 data = &val;
2155 break;
Johann Baudy69e3c752009-05-18 22:11:22 -07002156 case PACKET_LOSS:
2157 if (len > sizeof(unsigned int))
2158 len = sizeof(unsigned int);
2159 val = po->tp_loss;
2160 data = &val;
2161 break;
Scott McMillan614f60f2010-06-02 05:53:56 -07002162 case PACKET_TIMESTAMP:
2163 if (len > sizeof(int))
2164 len = sizeof(int);
2165 val = po->tp_tstamp;
2166 data = &val;
2167 break;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002168 default:
2169 return -ENOPROTOOPT;
2170 }
2171
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002172 if (put_user(len, optlen))
2173 return -EFAULT;
Herbert Xu8dc41942007-02-04 23:31:32 -08002174 if (copy_to_user(optval, data, len))
2175 return -EFAULT;
Kris Katterjohn8ae55f02006-01-23 16:28:02 -08002176 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002177}
2178
2179
2180static int packet_notifier(struct notifier_block *this, unsigned long msg, void *data)
2181{
2182 struct sock *sk;
2183 struct hlist_node *node;
Jason Lunzad930652007-02-20 23:19:54 -08002184 struct net_device *dev = data;
YOSHIFUJI Hideakic346dca2008-03-25 21:47:49 +09002185 struct net *net = dev_net(dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002186
stephen hemminger808f5112010-02-22 07:57:18 +00002187 rcu_read_lock();
2188 sk_for_each_rcu(sk, node, &net->packet.sklist) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002189 struct packet_sock *po = pkt_sk(sk);
2190
2191 switch (msg) {
2192 case NETDEV_UNREGISTER:
Linus Torvalds1da177e2005-04-16 15:20:36 -07002193 if (po->mclist)
2194 packet_dev_mclist(dev, po->mclist, -1);
David S. Millera2efcfa2007-05-29 13:12:50 -07002195 /* fallthrough */
2196
Linus Torvalds1da177e2005-04-16 15:20:36 -07002197 case NETDEV_DOWN:
2198 if (dev->ifindex == po->ifindex) {
2199 spin_lock(&po->bind_lock);
2200 if (po->running) {
2201 __dev_remove_pack(&po->prot_hook);
2202 __sock_put(sk);
2203 po->running = 0;
2204 sk->sk_err = ENETDOWN;
2205 if (!sock_flag(sk, SOCK_DEAD))
2206 sk->sk_error_report(sk);
2207 }
2208 if (msg == NETDEV_UNREGISTER) {
2209 po->ifindex = -1;
2210 po->prot_hook.dev = NULL;
2211 }
2212 spin_unlock(&po->bind_lock);
2213 }
2214 break;
2215 case NETDEV_UP:
stephen hemminger808f5112010-02-22 07:57:18 +00002216 if (dev->ifindex == po->ifindex) {
2217 spin_lock(&po->bind_lock);
2218 if (po->num && !po->running) {
2219 dev_add_pack(&po->prot_hook);
2220 sock_hold(sk);
2221 po->running = 1;
2222 }
2223 spin_unlock(&po->bind_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002224 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002225 break;
2226 }
2227 }
stephen hemminger808f5112010-02-22 07:57:18 +00002228 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002229 return NOTIFY_DONE;
2230}
2231
2232
2233static int packet_ioctl(struct socket *sock, unsigned int cmd,
2234 unsigned long arg)
2235{
2236 struct sock *sk = sock->sk;
2237
Johann Baudy69e3c752009-05-18 22:11:22 -07002238 switch (cmd) {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002239 case SIOCOUTQ:
2240 {
2241 int amount = sk_wmem_alloc_get(sk);
Eric Dumazet31e6d362009-06-17 19:05:41 -07002242
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002243 return put_user(amount, (int __user *)arg);
2244 }
2245 case SIOCINQ:
2246 {
2247 struct sk_buff *skb;
2248 int amount = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002249
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002250 spin_lock_bh(&sk->sk_receive_queue.lock);
2251 skb = skb_peek(&sk->sk_receive_queue);
2252 if (skb)
2253 amount = skb->len;
2254 spin_unlock_bh(&sk->sk_receive_queue.lock);
2255 return put_user(amount, (int __user *)arg);
2256 }
2257 case SIOCGSTAMP:
2258 return sock_get_timestamp(sk, (struct timeval __user *)arg);
2259 case SIOCGSTAMPNS:
2260 return sock_get_timestampns(sk, (struct timespec __user *)arg);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002261
Linus Torvalds1da177e2005-04-16 15:20:36 -07002262#ifdef CONFIG_INET
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002263 case SIOCADDRT:
2264 case SIOCDELRT:
2265 case SIOCDARP:
2266 case SIOCGARP:
2267 case SIOCSARP:
2268 case SIOCGIFADDR:
2269 case SIOCSIFADDR:
2270 case SIOCGIFBRDADDR:
2271 case SIOCSIFBRDADDR:
2272 case SIOCGIFNETMASK:
2273 case SIOCSIFNETMASK:
2274 case SIOCGIFDSTADDR:
2275 case SIOCSIFDSTADDR:
2276 case SIOCSIFFLAGS:
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002277 return inet_dgram_ops.ioctl(sock, cmd, arg);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002278#endif
2279
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002280 default:
2281 return -ENOIOCTLCMD;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002282 }
2283 return 0;
2284}
2285
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002286static unsigned int packet_poll(struct file *file, struct socket *sock,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002287 poll_table *wait)
2288{
2289 struct sock *sk = sock->sk;
2290 struct packet_sock *po = pkt_sk(sk);
2291 unsigned int mask = datagram_poll(file, sock, wait);
2292
2293 spin_lock_bh(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002294 if (po->rx_ring.pg_vec) {
2295 if (!packet_previous_frame(po, &po->rx_ring, TP_STATUS_KERNEL))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002296 mask |= POLLIN | POLLRDNORM;
2297 }
2298 spin_unlock_bh(&sk->sk_receive_queue.lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002299 spin_lock_bh(&sk->sk_write_queue.lock);
2300 if (po->tx_ring.pg_vec) {
2301 if (packet_current_frame(po, &po->tx_ring, TP_STATUS_AVAILABLE))
2302 mask |= POLLOUT | POLLWRNORM;
2303 }
2304 spin_unlock_bh(&sk->sk_write_queue.lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002305 return mask;
2306}
2307
2308
2309/* Dirty? Well, I still did not learn better way to account
2310 * for user mmaps.
2311 */
2312
2313static void packet_mm_open(struct vm_area_struct *vma)
2314{
2315 struct file *file = vma->vm_file;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002316 struct socket *sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002317 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002318
Linus Torvalds1da177e2005-04-16 15:20:36 -07002319 if (sk)
2320 atomic_inc(&pkt_sk(sk)->mapped);
2321}
2322
2323static void packet_mm_close(struct vm_area_struct *vma)
2324{
2325 struct file *file = vma->vm_file;
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002326 struct socket *sock = file->private_data;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002327 struct sock *sk = sock->sk;
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002328
Linus Torvalds1da177e2005-04-16 15:20:36 -07002329 if (sk)
2330 atomic_dec(&pkt_sk(sk)->mapped);
2331}
2332
Alexey Dobriyanf0f37e22009-09-27 22:29:37 +04002333static const struct vm_operations_struct packet_mmap_ops = {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002334 .open = packet_mm_open,
2335 .close = packet_mm_close,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002336};
2337
Neil Horman0e3125c2010-11-16 10:26:47 -08002338static void free_pg_vec(struct pgv *pg_vec, unsigned int order,
2339 unsigned int len)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002340{
2341 int i;
2342
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002343 for (i = 0; i < len; i++) {
Neil Horman0e3125c2010-11-16 10:26:47 -08002344 if (likely(pg_vec[i].buffer)) {
2345 if (pg_vec[i].flags & PGV_FROM_VMALLOC)
2346 vfree(pg_vec[i].buffer);
2347 else
2348 free_pages((unsigned long)pg_vec[i].buffer,
2349 order);
2350 pg_vec[i].buffer = NULL;
2351 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002352 }
2353 kfree(pg_vec);
2354}
2355
Neil Horman0e3125c2010-11-16 10:26:47 -08002356static inline char *alloc_one_pg_vec_page(unsigned long order,
2357 unsigned char *flags)
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002358{
Neil Horman0e3125c2010-11-16 10:26:47 -08002359 char *buffer = NULL;
2360 gfp_t gfp_flags = GFP_KERNEL | __GFP_COMP |
2361 __GFP_ZERO | __GFP_NOWARN | __GFP_NORETRY;
Eric Dumazet719bfea2009-04-15 03:39:52 -07002362
Neil Horman0e3125c2010-11-16 10:26:47 -08002363 buffer = (char *) __get_free_pages(gfp_flags, order);
2364
2365 if (buffer)
2366 return buffer;
2367
2368 /*
2369 * __get_free_pages failed, fall back to vmalloc
2370 */
2371 *flags |= PGV_FROM_VMALLOC;
Eric Dumazetbbce5a52010-11-20 07:31:54 +00002372 buffer = vzalloc((1 << order) * PAGE_SIZE);
Neil Horman0e3125c2010-11-16 10:26:47 -08002373
2374 if (buffer)
2375 return buffer;
2376
2377 /*
2378 * vmalloc failed, lets dig into swap here
2379 */
2380 *flags = 0;
2381 gfp_flags &= ~__GFP_NORETRY;
2382 buffer = (char *)__get_free_pages(gfp_flags, order);
2383 if (buffer)
2384 return buffer;
2385
2386 /*
2387 * complete and utter failure
2388 */
2389 return NULL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002390}
2391
Neil Horman0e3125c2010-11-16 10:26:47 -08002392static struct pgv *alloc_pg_vec(struct tpacket_req *req, int order)
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002393{
2394 unsigned int block_nr = req->tp_block_nr;
Neil Horman0e3125c2010-11-16 10:26:47 -08002395 struct pgv *pg_vec;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002396 int i;
2397
Neil Horman0e3125c2010-11-16 10:26:47 -08002398 pg_vec = kcalloc(block_nr, sizeof(struct pgv), GFP_KERNEL);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002399 if (unlikely(!pg_vec))
2400 goto out;
2401
2402 for (i = 0; i < block_nr; i++) {
Neil Horman0e3125c2010-11-16 10:26:47 -08002403 pg_vec[i].buffer = alloc_one_pg_vec_page(order,
2404 &pg_vec[i].flags);
2405 if (unlikely(!pg_vec[i].buffer))
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002406 goto out_free_pgvec;
2407 }
2408
2409out:
2410 return pg_vec;
2411
2412out_free_pgvec:
2413 free_pg_vec(pg_vec, order, block_nr);
Neil Horman0e3125c2010-11-16 10:26:47 -08002414 kfree(pg_vec);
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002415 pg_vec = NULL;
2416 goto out;
2417}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002418
Johann Baudy69e3c752009-05-18 22:11:22 -07002419static int packet_set_ring(struct sock *sk, struct tpacket_req *req,
2420 int closing, int tx_ring)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002421{
Neil Horman0e3125c2010-11-16 10:26:47 -08002422 struct pgv *pg_vec = NULL;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002423 struct packet_sock *po = pkt_sk(sk);
Al Viro0e11c912006-11-08 00:26:29 -08002424 int was_running, order = 0;
Johann Baudy69e3c752009-05-18 22:11:22 -07002425 struct packet_ring_buffer *rb;
2426 struct sk_buff_head *rb_queue;
Al Viro0e11c912006-11-08 00:26:29 -08002427 __be16 num;
Johann Baudy69e3c752009-05-18 22:11:22 -07002428 int err;
2429
2430 rb = tx_ring ? &po->tx_ring : &po->rx_ring;
2431 rb_queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
2432
2433 err = -EBUSY;
2434 if (!closing) {
2435 if (atomic_read(&po->mapped))
2436 goto out;
2437 if (atomic_read(&rb->pending))
2438 goto out;
2439 }
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002440
Linus Torvalds1da177e2005-04-16 15:20:36 -07002441 if (req->tp_block_nr) {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002442 /* Sanity tests and some calculations */
Johann Baudy69e3c752009-05-18 22:11:22 -07002443 err = -EBUSY;
2444 if (unlikely(rb->pg_vec))
2445 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002446
Patrick McHardybbd6ef82008-07-14 22:50:15 -07002447 switch (po->tp_version) {
2448 case TPACKET_V1:
2449 po->tp_hdrlen = TPACKET_HDRLEN;
2450 break;
2451 case TPACKET_V2:
2452 po->tp_hdrlen = TPACKET2_HDRLEN;
2453 break;
2454 }
2455
Johann Baudy69e3c752009-05-18 22:11:22 -07002456 err = -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002457 if (unlikely((int)req->tp_block_size <= 0))
Johann Baudy69e3c752009-05-18 22:11:22 -07002458 goto out;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002459 if (unlikely(req->tp_block_size & (PAGE_SIZE - 1)))
Johann Baudy69e3c752009-05-18 22:11:22 -07002460 goto out;
Patrick McHardy8913336a2008-07-18 18:05:19 -07002461 if (unlikely(req->tp_frame_size < po->tp_hdrlen +
Johann Baudy69e3c752009-05-18 22:11:22 -07002462 po->tp_reserve))
2463 goto out;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002464 if (unlikely(req->tp_frame_size & (TPACKET_ALIGNMENT - 1)))
Johann Baudy69e3c752009-05-18 22:11:22 -07002465 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002466
Johann Baudy69e3c752009-05-18 22:11:22 -07002467 rb->frames_per_block = req->tp_block_size/req->tp_frame_size;
2468 if (unlikely(rb->frames_per_block <= 0))
2469 goto out;
2470 if (unlikely((rb->frames_per_block * req->tp_block_nr) !=
2471 req->tp_frame_nr))
2472 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002473
2474 err = -ENOMEM;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002475 order = get_order(req->tp_block_size);
2476 pg_vec = alloc_pg_vec(req, order);
2477 if (unlikely(!pg_vec))
Linus Torvalds1da177e2005-04-16 15:20:36 -07002478 goto out;
Johann Baudy69e3c752009-05-18 22:11:22 -07002479 }
2480 /* Done */
2481 else {
2482 err = -EINVAL;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002483 if (unlikely(req->tp_frame_nr))
Johann Baudy69e3c752009-05-18 22:11:22 -07002484 goto out;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002485 }
2486
2487 lock_sock(sk);
2488
2489 /* Detach socket from network */
2490 spin_lock(&po->bind_lock);
2491 was_running = po->running;
2492 num = po->num;
2493 if (was_running) {
2494 __dev_remove_pack(&po->prot_hook);
2495 po->num = 0;
2496 po->running = 0;
2497 __sock_put(sk);
2498 }
2499 spin_unlock(&po->bind_lock);
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002500
Linus Torvalds1da177e2005-04-16 15:20:36 -07002501 synchronize_net();
2502
2503 err = -EBUSY;
Herbert Xu905db442009-01-30 14:12:06 -08002504 mutex_lock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002505 if (closing || atomic_read(&po->mapped) == 0) {
2506 err = 0;
2507#define XC(a, b) ({ __typeof__ ((a)) __t; __t = (a); (a) = (b); __t; })
Johann Baudy69e3c752009-05-18 22:11:22 -07002508 spin_lock_bh(&rb_queue->lock);
2509 pg_vec = XC(rb->pg_vec, pg_vec);
2510 rb->frame_max = (req->tp_frame_nr - 1);
2511 rb->head = 0;
2512 rb->frame_size = req->tp_frame_size;
2513 spin_unlock_bh(&rb_queue->lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002514
Johann Baudy69e3c752009-05-18 22:11:22 -07002515 order = XC(rb->pg_vec_order, order);
2516 req->tp_block_nr = XC(rb->pg_vec_len, req->tp_block_nr);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002517
Johann Baudy69e3c752009-05-18 22:11:22 -07002518 rb->pg_vec_pages = req->tp_block_size/PAGE_SIZE;
2519 po->prot_hook.func = (po->rx_ring.pg_vec) ?
2520 tpacket_rcv : packet_rcv;
2521 skb_queue_purge(rb_queue);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002522#undef XC
2523 if (atomic_read(&po->mapped))
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002524 pr_err("packet_mmap: vma is busy: %d\n",
2525 atomic_read(&po->mapped));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002526 }
Herbert Xu905db442009-01-30 14:12:06 -08002527 mutex_unlock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002528
2529 spin_lock(&po->bind_lock);
2530 if (was_running && !po->running) {
2531 sock_hold(sk);
2532 po->running = 1;
2533 po->num = num;
2534 dev_add_pack(&po->prot_hook);
2535 }
2536 spin_unlock(&po->bind_lock);
2537
2538 release_sock(sk);
2539
Linus Torvalds1da177e2005-04-16 15:20:36 -07002540 if (pg_vec)
2541 free_pg_vec(pg_vec, order, req->tp_block_nr);
2542out:
2543 return err;
2544}
2545
Johann Baudy69e3c752009-05-18 22:11:22 -07002546static int packet_mmap(struct file *file, struct socket *sock,
2547 struct vm_area_struct *vma)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002548{
2549 struct sock *sk = sock->sk;
2550 struct packet_sock *po = pkt_sk(sk);
Johann Baudy69e3c752009-05-18 22:11:22 -07002551 unsigned long size, expected_size;
2552 struct packet_ring_buffer *rb;
Linus Torvalds1da177e2005-04-16 15:20:36 -07002553 unsigned long start;
2554 int err = -EINVAL;
2555 int i;
2556
2557 if (vma->vm_pgoff)
2558 return -EINVAL;
2559
Herbert Xu905db442009-01-30 14:12:06 -08002560 mutex_lock(&po->pg_vec_lock);
Johann Baudy69e3c752009-05-18 22:11:22 -07002561
2562 expected_size = 0;
2563 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2564 if (rb->pg_vec) {
2565 expected_size += rb->pg_vec_len
2566 * rb->pg_vec_pages
2567 * PAGE_SIZE;
2568 }
2569 }
2570
2571 if (expected_size == 0)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002572 goto out;
Johann Baudy69e3c752009-05-18 22:11:22 -07002573
2574 size = vma->vm_end - vma->vm_start;
2575 if (size != expected_size)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002576 goto out;
2577
Linus Torvalds1da177e2005-04-16 15:20:36 -07002578 start = vma->vm_start;
Johann Baudy69e3c752009-05-18 22:11:22 -07002579 for (rb = &po->rx_ring; rb <= &po->tx_ring; rb++) {
2580 if (rb->pg_vec == NULL)
2581 continue;
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002582
Johann Baudy69e3c752009-05-18 22:11:22 -07002583 for (i = 0; i < rb->pg_vec_len; i++) {
Neil Horman0e3125c2010-11-16 10:26:47 -08002584 struct page *page;
2585 void *kaddr = rb->pg_vec[i].buffer;
Johann Baudy69e3c752009-05-18 22:11:22 -07002586 int pg_num;
2587
2588 for (pg_num = 0; pg_num < rb->pg_vec_pages;
Neil Horman0e3125c2010-11-16 10:26:47 -08002589 pg_num++) {
2590 if (rb->pg_vec[i].flags & PGV_FROM_VMALLOC)
2591 page = vmalloc_to_page(kaddr);
2592 else
2593 page = virt_to_page(kaddr);
2594
Johann Baudy69e3c752009-05-18 22:11:22 -07002595 err = vm_insert_page(vma, start, page);
2596 if (unlikely(err))
2597 goto out;
2598 start += PAGE_SIZE;
Neil Horman0e3125c2010-11-16 10:26:47 -08002599 kaddr += PAGE_SIZE;
Johann Baudy69e3c752009-05-18 22:11:22 -07002600 }
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002601 }
Linus Torvalds1da177e2005-04-16 15:20:36 -07002602 }
Johann Baudy69e3c752009-05-18 22:11:22 -07002603
David S. Miller4ebf0ae2005-12-06 16:38:35 -08002604 atomic_inc(&po->mapped);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002605 vma->vm_ops = &packet_mmap_ops;
2606 err = 0;
2607
2608out:
Herbert Xu905db442009-01-30 14:12:06 -08002609 mutex_unlock(&po->pg_vec_lock);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002610 return err;
2611}
Linus Torvalds1da177e2005-04-16 15:20:36 -07002612
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08002613static const struct proto_ops packet_ops_spkt = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002614 .family = PF_PACKET,
2615 .owner = THIS_MODULE,
2616 .release = packet_release,
2617 .bind = packet_bind_spkt,
2618 .connect = sock_no_connect,
2619 .socketpair = sock_no_socketpair,
2620 .accept = sock_no_accept,
2621 .getname = packet_getname_spkt,
2622 .poll = datagram_poll,
2623 .ioctl = packet_ioctl,
2624 .listen = sock_no_listen,
2625 .shutdown = sock_no_shutdown,
2626 .setsockopt = sock_no_setsockopt,
2627 .getsockopt = sock_no_getsockopt,
2628 .sendmsg = packet_sendmsg_spkt,
2629 .recvmsg = packet_recvmsg,
2630 .mmap = sock_no_mmap,
2631 .sendpage = sock_no_sendpage,
2632};
Linus Torvalds1da177e2005-04-16 15:20:36 -07002633
Eric Dumazet90ddc4f2005-12-22 12:49:22 -08002634static const struct proto_ops packet_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002635 .family = PF_PACKET,
2636 .owner = THIS_MODULE,
2637 .release = packet_release,
2638 .bind = packet_bind,
2639 .connect = sock_no_connect,
2640 .socketpair = sock_no_socketpair,
2641 .accept = sock_no_accept,
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002642 .getname = packet_getname,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002643 .poll = packet_poll,
2644 .ioctl = packet_ioctl,
2645 .listen = sock_no_listen,
2646 .shutdown = sock_no_shutdown,
2647 .setsockopt = packet_setsockopt,
2648 .getsockopt = packet_getsockopt,
2649 .sendmsg = packet_sendmsg,
2650 .recvmsg = packet_recvmsg,
2651 .mmap = packet_mmap,
2652 .sendpage = sock_no_sendpage,
2653};
2654
Stephen Hemmingerec1b4cf2009-10-05 05:58:39 +00002655static const struct net_proto_family packet_family_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002656 .family = PF_PACKET,
2657 .create = packet_create,
2658 .owner = THIS_MODULE,
2659};
2660
2661static struct notifier_block packet_netdev_notifier = {
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002662 .notifier_call = packet_notifier,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002663};
2664
2665#ifdef CONFIG_PROC_FS
Linus Torvalds1da177e2005-04-16 15:20:36 -07002666
2667static void *packet_seq_start(struct seq_file *seq, loff_t *pos)
stephen hemminger808f5112010-02-22 07:57:18 +00002668 __acquires(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002669{
Denis V. Luneve372c412007-11-19 22:31:54 -08002670 struct net *net = seq_file_net(seq);
stephen hemminger808f5112010-02-22 07:57:18 +00002671
2672 rcu_read_lock();
2673 return seq_hlist_start_head_rcu(&net->packet.sklist, *pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002674}
2675
2676static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2677{
Herbert Xu1bf40952007-12-16 14:04:02 -08002678 struct net *net = seq_file_net(seq);
stephen hemminger808f5112010-02-22 07:57:18 +00002679 return seq_hlist_next_rcu(v, &net->packet.sklist, pos);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002680}
2681
2682static void packet_seq_stop(struct seq_file *seq, void *v)
stephen hemminger808f5112010-02-22 07:57:18 +00002683 __releases(RCU)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002684{
stephen hemminger808f5112010-02-22 07:57:18 +00002685 rcu_read_unlock();
Linus Torvalds1da177e2005-04-16 15:20:36 -07002686}
2687
YOSHIFUJI Hideaki1ce4f282007-02-09 23:25:10 +09002688static int packet_seq_show(struct seq_file *seq, void *v)
Linus Torvalds1da177e2005-04-16 15:20:36 -07002689{
2690 if (v == SEQ_START_TOKEN)
2691 seq_puts(seq, "sk RefCnt Type Proto Iface R Rmem User Inode\n");
2692 else {
Li Zefanb7ceabd2010-02-08 23:19:29 +00002693 struct sock *s = sk_entry(v);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002694 const struct packet_sock *po = pkt_sk(s);
2695
2696 seq_printf(seq,
2697 "%p %-6d %-4d %04x %-5d %1d %-6u %-6u %-6lu\n",
2698 s,
2699 atomic_read(&s->sk_refcnt),
2700 s->sk_type,
2701 ntohs(po->num),
2702 po->ifindex,
2703 po->running,
2704 atomic_read(&s->sk_rmem_alloc),
2705 sock_i_uid(s),
Eric Dumazet40d4e3d2009-07-21 21:57:59 +00002706 sock_i_ino(s));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002707 }
2708
2709 return 0;
2710}
2711
Philippe De Muyter56b3d972007-07-10 23:07:31 -07002712static const struct seq_operations packet_seq_ops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002713 .start = packet_seq_start,
2714 .next = packet_seq_next,
2715 .stop = packet_seq_stop,
2716 .show = packet_seq_show,
2717};
2718
2719static int packet_seq_open(struct inode *inode, struct file *file)
2720{
Denis V. Luneve372c412007-11-19 22:31:54 -08002721 return seq_open_net(inode, file, &packet_seq_ops,
2722 sizeof(struct seq_net_private));
Linus Torvalds1da177e2005-04-16 15:20:36 -07002723}
2724
Arjan van de Venda7071d2007-02-12 00:55:36 -08002725static const struct file_operations packet_seq_fops = {
Linus Torvalds1da177e2005-04-16 15:20:36 -07002726 .owner = THIS_MODULE,
2727 .open = packet_seq_open,
2728 .read = seq_read,
2729 .llseek = seq_lseek,
Denis V. Luneve372c412007-11-19 22:31:54 -08002730 .release = seq_release_net,
Linus Torvalds1da177e2005-04-16 15:20:36 -07002731};
2732
2733#endif
2734
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002735static int __net_init packet_net_init(struct net *net)
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002736{
stephen hemminger808f5112010-02-22 07:57:18 +00002737 spin_lock_init(&net->packet.sklist_lock);
Denis V. Lunev2aaef4e2007-12-11 04:19:54 -08002738 INIT_HLIST_HEAD(&net->packet.sklist);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002739
2740 if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops))
2741 return -ENOMEM;
2742
2743 return 0;
2744}
2745
Alexey Dobriyan2c8c1e72010-01-17 03:35:32 +00002746static void __net_exit packet_net_exit(struct net *net)
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002747{
2748 proc_net_remove(net, "packet");
2749}
2750
2751static struct pernet_operations packet_net_ops = {
2752 .init = packet_net_init,
2753 .exit = packet_net_exit,
2754};
2755
2756
Linus Torvalds1da177e2005-04-16 15:20:36 -07002757static void __exit packet_exit(void)
2758{
Linus Torvalds1da177e2005-04-16 15:20:36 -07002759 unregister_netdevice_notifier(&packet_netdev_notifier);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002760 unregister_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002761 sock_unregister(PF_PACKET);
2762 proto_unregister(&packet_proto);
2763}
2764
2765static int __init packet_init(void)
2766{
2767 int rc = proto_register(&packet_proto, 0);
2768
2769 if (rc != 0)
2770 goto out;
2771
2772 sock_register(&packet_family_ops);
Denis V. Lunevd12d01d2007-11-19 22:28:35 -08002773 register_pernet_subsys(&packet_net_ops);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002774 register_netdevice_notifier(&packet_netdev_notifier);
Linus Torvalds1da177e2005-04-16 15:20:36 -07002775out:
2776 return rc;
2777}
2778
2779module_init(packet_init);
2780module_exit(packet_exit);
2781MODULE_LICENSE("GPL");
2782MODULE_ALIAS_NETPROTO(PF_PACKET);