blob: f2a6e713224153bb5e6861aebce481089d227597 [file] [log] [blame]
Linus Torvalds1da177e2005-04-16 15:20:36 -07001/*
2 * INET An implementation of the TCP/IP protocol suite for the LINUX
3 * operating system. INET is implemented using the BSD Socket
4 * interface as the means of communication with the user level.
5 *
6 * Pseudo-driver for the loopback interface.
7 *
8 * Version: @(#)loopback.c 1.0.4b 08/16/93
9 *
Jesper Juhl02c30a82005-05-05 16:16:16 -070010 * Authors: Ross Biro
Linus Torvalds1da177e2005-04-16 15:20:36 -070011 * Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
12 * Donald Becker, <becker@scyld.com>
13 *
14 * Alan Cox : Fixed oddments for NET3.014
15 * Alan Cox : Rejig for NET3.029 snap #3
16 * Alan Cox : Fixed NET3.029 bugs and sped up
17 * Larry McVoy : Tiny tweak to double performance
18 * Alan Cox : Backed out LMV's tweak - the linux mm
19 * can't take it...
20 * Michael Griffith: Don't bother computing the checksums
21 * on packets received on the loopback
22 * interface.
23 * Alexey Kuznetsov: Potential hang under some extreme
24 * cases removed.
25 *
26 * This program is free software; you can redistribute it and/or
27 * modify it under the terms of the GNU General Public License
28 * as published by the Free Software Foundation; either version
29 * 2 of the License, or (at your option) any later version.
30 */
31#include <linux/kernel.h>
32#include <linux/jiffies.h>
33#include <linux/module.h>
34#include <linux/interrupt.h>
35#include <linux/fs.h>
36#include <linux/types.h>
37#include <linux/string.h>
38#include <linux/socket.h>
39#include <linux/errno.h>
40#include <linux/fcntl.h>
41#include <linux/in.h>
42#include <linux/init.h>
43
44#include <asm/system.h>
45#include <asm/uaccess.h>
46#include <asm/io.h>
47
48#include <linux/inet.h>
49#include <linux/netdevice.h>
50#include <linux/etherdevice.h>
51#include <linux/skbuff.h>
52#include <linux/ethtool.h>
53#include <net/sock.h>
54#include <net/checksum.h>
55#include <linux/if_ether.h> /* For the statistics structure. */
56#include <linux/if_arp.h> /* For ARPHRD_ETHER */
57#include <linux/ip.h>
58#include <linux/tcp.h>
59#include <linux/percpu.h>
Eric W. Biederman2774c7a2007-09-26 22:10:56 -070060#include <net/net_namespace.h>
Linus Torvalds1da177e2005-04-16 15:20:36 -070061
Eric Dumazet5175c372006-10-18 20:51:57 -070062struct pcpu_lstats {
63 unsigned long packets;
64 unsigned long bytes;
65};
Linus Torvalds1da177e2005-04-16 15:20:36 -070066
Linus Torvalds1da177e2005-04-16 15:20:36 -070067/* KISS: just allocate small chunks and copy bits.
68 *
69 * So, in fact, this is documentation, explaining what we expect
70 * of largesending device modulo TCP checksum, which is ignored for loopback.
71 */
72
Chuck Ebbertd2ae1d22005-07-02 21:28:21 -040073#ifdef LOOPBACK_TSO
Linus Torvalds1da177e2005-04-16 15:20:36 -070074static void emulate_large_send_offload(struct sk_buff *skb)
75{
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -070076 struct iphdr *iph = ip_hdr(skb);
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -070077 struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) +
78 (iph->ihl * 4));
Linus Torvalds1da177e2005-04-16 15:20:36 -070079 unsigned int doffset = (iph->ihl + th->doff) * 4;
Herbert Xu79671682006-06-22 02:40:14 -070080 unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
Linus Torvalds1da177e2005-04-16 15:20:36 -070081 unsigned int offset = 0;
82 u32 seq = ntohl(th->seq);
83 u16 id = ntohs(iph->id);
84
85 while (offset + doffset < skb->len) {
86 unsigned int frag_size = min(mtu, skb->len - offset) - doffset;
87 struct sk_buff *nskb = alloc_skb(mtu + 32, GFP_ATOMIC);
88
89 if (!nskb)
90 break;
91 skb_reserve(nskb, 32);
Arnaldo Carvalho de Melo48d49d0c2007-03-10 12:30:58 -030092 skb_set_mac_header(nskb, -ETH_HLEN);
Arnaldo Carvalho de Meloc1d2bbe2007-04-10 20:45:18 -070093 skb_reset_network_header(nskb);
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -070094 iph = ip_hdr(nskb);
Arnaldo Carvalho de Melo27d7ff42007-03-31 11:55:19 -030095 skb_copy_to_linear_data(nskb, skb_network_header(skb),
96 doffset);
Linus Torvalds1da177e2005-04-16 15:20:36 -070097 if (skb_copy_bits(skb,
98 doffset + offset,
99 nskb->data + doffset,
100 frag_size))
101 BUG();
102 skb_put(nskb, doffset + frag_size);
103 nskb->ip_summed = CHECKSUM_UNNECESSARY;
104 nskb->dev = skb->dev;
105 nskb->priority = skb->priority;
106 nskb->protocol = skb->protocol;
107 nskb->dst = dst_clone(skb->dst);
108 memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
109 nskb->pkt_type = skb->pkt_type;
110
Arnaldo Carvalho de Melod56f90a2007-04-10 20:50:43 -0700111 th = (struct tcphdr *)(skb_network_header(nskb) + iph->ihl * 4);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700112 iph->tot_len = htons(frag_size + doffset);
113 iph->id = htons(id);
114 iph->check = 0;
115 iph->check = ip_fast_csum((unsigned char *) iph, iph->ihl);
116 th->seq = htonl(seq);
117 if (offset + doffset + frag_size < skb->len)
118 th->fin = th->psh = 0;
119 netif_rx(nskb);
120 offset += frag_size;
121 seq += frag_size;
122 id++;
123 }
124
125 dev_kfree_skb(skb);
126}
Chuck Ebbertd2ae1d22005-07-02 21:28:21 -0400127#endif /* LOOPBACK_TSO */
Linus Torvalds1da177e2005-04-16 15:20:36 -0700128
129/*
130 * The higher levels take care of making this non-reentrant (it's
131 * called with bh's disabled).
132 */
133static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
134{
Eric W. Biederman5f6d88b2007-09-26 22:08:12 -0700135 struct pcpu_lstats *pcpu_lstats, *lb_stats;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700136
137 skb_orphan(skb);
138
Chuck Ebbert0e920bf2005-07-02 21:28:23 -0400139 skb->protocol = eth_type_trans(skb,dev);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700140#ifndef LOOPBACK_MUST_CHECKSUM
141 skb->ip_summed = CHECKSUM_UNNECESSARY;
142#endif
143
Chuck Ebbertd2ae1d22005-07-02 21:28:21 -0400144#ifdef LOOPBACK_TSO
Herbert Xu89114af2006-07-08 13:34:32 -0700145 if (skb_is_gso(skb)) {
Linus Torvalds1da177e2005-04-16 15:20:36 -0700146 BUG_ON(skb->protocol != htons(ETH_P_IP));
Arnaldo Carvalho de Meloeddc9ec2007-04-20 22:47:35 -0700147 BUG_ON(ip_hdr(skb)->protocol != IPPROTO_TCP);
Linus Torvalds1da177e2005-04-16 15:20:36 -0700148
149 emulate_large_send_offload(skb);
150 return 0;
151 }
Chuck Ebbertd2ae1d22005-07-02 21:28:21 -0400152#endif
Linus Torvalds1da177e2005-04-16 15:20:36 -0700153 dev->last_rx = jiffies;
154
Eric W. Biederman9e0db4b2007-09-27 17:09:39 -0700155 /* it's OK to use per_cpu_ptr() because BHs are off */
Eric W. Biederman5f6d88b2007-09-26 22:08:12 -0700156 pcpu_lstats = netdev_priv(dev);
157 lb_stats = per_cpu_ptr(pcpu_lstats, smp_processor_id());
Eric Dumazet5175c372006-10-18 20:51:57 -0700158 lb_stats->bytes += skb->len;
159 lb_stats->packets++;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700160
161 netif_rx(skb);
162
Eric Dumazet58f53972006-10-20 00:32:41 -0700163 return 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700164}
165
166static struct net_device_stats *get_stats(struct net_device *dev)
167{
Eric W. Biederman5f6d88b2007-09-26 22:08:12 -0700168 const struct pcpu_lstats *pcpu_lstats;
Eric Dumazet33036802007-04-10 13:25:40 -0700169 struct net_device_stats *stats = &dev->stats;
Eric Dumazet5175c372006-10-18 20:51:57 -0700170 unsigned long bytes = 0;
171 unsigned long packets = 0;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700172 int i;
173
Eric W. Biederman5f6d88b2007-09-26 22:08:12 -0700174 pcpu_lstats = netdev_priv(dev);
KAMEZAWA Hiroyuki0fed4842006-03-28 01:56:37 -0800175 for_each_possible_cpu(i) {
Eric Dumazet5175c372006-10-18 20:51:57 -0700176 const struct pcpu_lstats *lb_stats;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700177
Eric W. Biederman5f6d88b2007-09-26 22:08:12 -0700178 lb_stats = per_cpu_ptr(pcpu_lstats, i);
Eric Dumazet5175c372006-10-18 20:51:57 -0700179 bytes += lb_stats->bytes;
180 packets += lb_stats->packets;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700181 }
Eric Dumazet5175c372006-10-18 20:51:57 -0700182 stats->rx_packets = packets;
183 stats->tx_packets = packets;
184 stats->rx_bytes = bytes;
185 stats->tx_bytes = bytes;
Linus Torvalds1da177e2005-04-16 15:20:36 -0700186 return stats;
187}
188
Stephen Hemminger7fa6b062006-09-27 20:33:34 -0700189static u32 always_on(struct net_device *dev)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700190{
191 return 1;
192}
193
Jeff Garzik7282d492006-09-13 14:30:00 -0400194static const struct ethtool_ops loopback_ethtool_ops = {
Stephen Hemminger7fa6b062006-09-27 20:33:34 -0700195 .get_link = always_on,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700196 .set_tso = ethtool_op_set_tso,
Stephen Hemminger7fa6b062006-09-27 20:33:34 -0700197 .get_tx_csum = always_on,
198 .get_sg = always_on,
199 .get_rx_csum = always_on,
Linus Torvalds1da177e2005-04-16 15:20:36 -0700200};
201
Eric W. Biederman5f6d88b2007-09-26 22:08:12 -0700202static int loopback_dev_init(struct net_device *dev)
203{
204 struct pcpu_lstats *lstats;
205
206 lstats = alloc_percpu(struct pcpu_lstats);
207 if (!lstats)
208 return -ENOMEM;
209
210 dev->priv = lstats;
211 return 0;
212}
213
214static void loopback_dev_free(struct net_device *dev)
215{
216 struct pcpu_lstats *lstats = netdev_priv(dev);
217
218 free_percpu(lstats);
219 free_netdev(dev);
220}
221
Stephen Hemminger7fa6b062006-09-27 20:33:34 -0700222/*
Eric W. Biederman9e0db4b2007-09-27 17:09:39 -0700223 * The loopback device is special. There is only one instance
224 * per network namespace.
Stephen Hemminger7fa6b062006-09-27 20:33:34 -0700225 */
Daniel Lezcano854d8362007-09-25 19:18:04 -0700226static void loopback_setup(struct net_device *dev)
227{
228 dev->get_stats = &get_stats;
229 dev->mtu = (16 * 1024) + 20 + 20 + 12;
230 dev->hard_start_xmit = loopback_xmit;
Daniel Lezcano854d8362007-09-25 19:18:04 -0700231 dev->hard_header_len = ETH_HLEN; /* 14 */
232 dev->addr_len = ETH_ALEN; /* 6 */
233 dev->tx_queue_len = 0;
234 dev->type = ARPHRD_LOOPBACK; /* 0x0001*/
Daniel Lezcano854d8362007-09-25 19:18:04 -0700235 dev->flags = IFF_LOOPBACK;
236 dev->features = NETIF_F_SG | NETIF_F_FRAGLIST
Chuck Ebbertd2ae1d22005-07-02 21:28:21 -0400237#ifdef LOOPBACK_TSO
Daniel Lezcano854d8362007-09-25 19:18:04 -0700238 | NETIF_F_TSO
Chuck Ebbertd2ae1d22005-07-02 21:28:21 -0400239#endif
Daniel Lezcano854d8362007-09-25 19:18:04 -0700240 | NETIF_F_NO_CSUM
241 | NETIF_F_HIGHDMA
242 | NETIF_F_LLTX
Emil Medve2d2c54e2007-12-27 08:17:22 -0600243 | NETIF_F_NETNS_LOCAL;
Daniel Lezcano854d8362007-09-25 19:18:04 -0700244 dev->ethtool_ops = &loopback_ethtool_ops;
Stephen Hemminger3b04ddd2007-10-09 01:40:57 -0700245 dev->header_ops = &eth_header_ops;
Eric W. Biederman5f6d88b2007-09-26 22:08:12 -0700246 dev->init = loopback_dev_init;
247 dev->destructor = loopback_dev_free;
Daniel Lezcano854d8362007-09-25 19:18:04 -0700248}
Daniel Lezcanode3cb742007-09-25 19:16:28 -0700249
Ralf Baechle22783642005-08-18 14:05:18 -0700250/* Setup and register the loopback device. */
Pavel Emelyanov46650792007-10-08 20:38:39 -0700251static __net_init int loopback_net_init(struct net *net)
Linus Torvalds1da177e2005-04-16 15:20:36 -0700252{
Daniel Lezcano854d8362007-09-25 19:18:04 -0700253 struct net_device *dev;
254 int err;
Herbert Xuaeed9e82007-07-30 16:37:19 -0700255
Daniel Lezcano854d8362007-09-25 19:18:04 -0700256 err = -ENOMEM;
257 dev = alloc_netdev(0, "lo", loopback_setup);
258 if (!dev)
259 goto out;
260
Eric W. Biederman2774c7a2007-09-26 22:10:56 -0700261 dev->nd_net = net;
Daniel Lezcano854d8362007-09-25 19:18:04 -0700262 err = register_netdev(dev);
263 if (err)
264 goto out_free_netdev;
265
Eric W. Biederman2774c7a2007-09-26 22:10:56 -0700266 net->loopback_dev = dev;
Pavel Emelyanov9d6dda322007-10-15 12:55:33 -0700267 return 0;
Daniel Lezcano854d8362007-09-25 19:18:04 -0700268
Linus Torvalds1da177e2005-04-16 15:20:36 -0700269
Daniel Lezcano854d8362007-09-25 19:18:04 -0700270out_free_netdev:
271 free_netdev(dev);
Pavel Emelyanov9d6dda322007-10-15 12:55:33 -0700272out:
273 if (net == &init_net)
274 panic("loopback: Failed to register netdevice: %d\n", err);
275 return err;
Daniel Lezcano854d8362007-09-25 19:18:04 -0700276}
Adrian Bunk60903f22007-01-02 00:35:48 -0800277
Pavel Emelyanov46650792007-10-08 20:38:39 -0700278static __net_exit void loopback_net_exit(struct net *net)
Eric W. Biederman2774c7a2007-09-26 22:10:56 -0700279{
280 struct net_device *dev = net->loopback_dev;
Daniel Lezcano854d8362007-09-25 19:18:04 -0700281
Eric W. Biederman2774c7a2007-09-26 22:10:56 -0700282 unregister_netdev(dev);
283}
284
Denis V. Lunev022cbae2007-11-13 03:23:50 -0800285static struct pernet_operations __net_initdata loopback_net_ops = {
Eric W. Biederman2774c7a2007-09-26 22:10:56 -0700286 .init = loopback_net_init,
287 .exit = loopback_net_exit,
288};
289
290static int __init loopback_init(void)
291{
292 return register_pernet_device(&loopback_net_ops);
293}
294
Denis V. Lunev070ac3a2007-09-27 12:04:19 -0700295/* Loopback is special. It should be initialized before any other network
296 * device and network subsystem.
297 */
Eric W. Biederman2774c7a2007-09-26 22:10:56 -0700298fs_initcall(loopback_init);