blob: 1895a4ca9c4f7878467afee19ab714f072c76d57 [file] [log] [blame]
Eric W. Biederman5f256be2007-09-12 11:50:50 +02001#include <linux/workqueue.h>
2#include <linux/rtnetlink.h>
3#include <linux/cache.h>
4#include <linux/slab.h>
5#include <linux/list.h>
6#include <linux/delay.h>
Eric W. Biederman9dd776b2007-09-26 22:04:26 -07007#include <linux/sched.h>
Pavel Emelyanovc93cf612008-04-15 00:35:23 -07008#include <linux/idr.h>
Eric W. Biederman5f256be2007-09-12 11:50:50 +02009#include <net/net_namespace.h>
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070010#include <net/netns/generic.h>
Eric W. Biederman5f256be2007-09-12 11:50:50 +020011
12/*
13 * Our network namespace constructor/destructor lists
14 */
15
16static LIST_HEAD(pernet_list);
17static struct list_head *first_device = &pernet_list;
18static DEFINE_MUTEX(net_mutex);
19
Eric W. Biederman5f256be2007-09-12 11:50:50 +020020LIST_HEAD(net_namespace_list);
Alexey Dobriyanb76a4612008-10-08 11:35:06 +020021EXPORT_SYMBOL_GPL(net_namespace_list);
Eric W. Biederman5f256be2007-09-12 11:50:50 +020022
Eric W. Biederman5f256be2007-09-12 11:50:50 +020023struct net init_net;
Denis V. Lunevff4b9502008-01-22 22:05:33 -080024EXPORT_SYMBOL(init_net);
Eric W. Biederman5f256be2007-09-12 11:50:50 +020025
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070026#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
27
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070028/*
29 * setup_net runs the initializers for the network namespace object.
30 */
Pavel Emelyanov1a2ee932007-11-01 00:45:59 -070031static __net_init int setup_net(struct net *net)
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070032{
33 /* Must be called with net_mutex held */
34 struct pernet_operations *ops;
35 int error;
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070036 struct net_generic *ng;
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070037
38 atomic_set(&net->count, 1);
Denis V. Lunev5d1e4462008-04-16 01:58:04 -070039#ifdef NETNS_REFCNT_DEBUG
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070040 atomic_set(&net->use_count, 0);
Denis V. Lunev5d1e4462008-04-16 01:58:04 -070041#endif
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070042
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070043 error = -ENOMEM;
44 ng = kzalloc(sizeof(struct net_generic) +
45 INITIAL_NET_GEN_PTRS * sizeof(void *), GFP_KERNEL);
46 if (ng == NULL)
47 goto out;
48
49 ng->len = INITIAL_NET_GEN_PTRS;
50 INIT_RCU_HEAD(&ng->rcu);
51 rcu_assign_pointer(net->gen, ng);
52
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070053 error = 0;
54 list_for_each_entry(ops, &pernet_list, list) {
55 if (ops->init) {
56 error = ops->init(net);
57 if (error < 0)
58 goto out_undo;
59 }
60 }
61out:
62 return error;
63
64out_undo:
65 /* Walk through the list backwards calling the exit functions
66 * for the pernet modules whose init functions did not fail.
67 */
68 list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
69 if (ops->exit)
70 ops->exit(net);
71 }
72
73 rcu_barrier();
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070074 kfree(ng);
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070075 goto out;
76}
77
78#ifdef CONFIG_NET_NS
Pavel Emelyanovd57a9212007-11-01 00:46:50 -070079static struct kmem_cache *net_cachep;
Benjamin Thery3ef13552007-11-19 23:18:16 -080080static struct workqueue_struct *netns_wq;
Pavel Emelyanovd57a9212007-11-01 00:46:50 -070081
Eric W. Biederman5f256be2007-09-12 11:50:50 +020082static struct net *net_alloc(void)
83{
Pavel Emelyanov32f0c4c2007-10-09 13:02:17 -070084 return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
Eric W. Biederman5f256be2007-09-12 11:50:50 +020085}
Eric W. Biederman5f256be2007-09-12 11:50:50 +020086
Johann Felix Soden45a19b02007-11-07 01:30:30 -080087static void net_free(struct net *net)
88{
89 if (!net)
90 return;
91
Denis V. Lunev5d1e4462008-04-16 01:58:04 -070092#ifdef NETNS_REFCNT_DEBUG
Johann Felix Soden45a19b02007-11-07 01:30:30 -080093 if (unlikely(atomic_read(&net->use_count) != 0)) {
94 printk(KERN_EMERG "network namespace not free! Usage: %d\n",
95 atomic_read(&net->use_count));
96 return;
97 }
Denis V. Lunev5d1e4462008-04-16 01:58:04 -070098#endif
Alexey Dobriyan4ef079c2008-10-14 22:54:48 -070099 kfree(net->gen);
Johann Felix Soden45a19b02007-11-07 01:30:30 -0800100 kmem_cache_free(net_cachep, net);
101}
102
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700103struct net *copy_net_ns(unsigned long flags, struct net *old_net)
104{
105 struct net *new_net = NULL;
106 int err;
107
108 get_net(old_net);
109
110 if (!(flags & CLONE_NEWNET))
111 return old_net;
112
113 err = -ENOMEM;
114 new_net = net_alloc();
115 if (!new_net)
116 goto out;
117
118 mutex_lock(&net_mutex);
119 err = setup_net(new_net);
120 if (err)
121 goto out_unlock;
122
123 rtnl_lock();
124 list_add_tail(&new_net->list, &net_namespace_list);
125 rtnl_unlock();
126
127
128out_unlock:
129 mutex_unlock(&net_mutex);
130out:
131 put_net(old_net);
132 if (err) {
133 net_free(new_net);
134 new_net = ERR_PTR(err);
135 }
136 return new_net;
137}
138
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200139static void cleanup_net(struct work_struct *work)
140{
141 struct pernet_operations *ops;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200142 struct net *net;
143
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -0700144 /* Be very certain incoming network packets will not find us */
145 rcu_barrier();
146
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200147 net = container_of(work, struct net, work);
148
149 mutex_lock(&net_mutex);
150
151 /* Don't let anyone else find us. */
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700152 rtnl_lock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200153 list_del(&net->list);
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700154 rtnl_unlock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200155
156 /* Run all of the network namespace exit methods */
Pavel Emelyanov768f35912007-09-18 13:20:41 -0700157 list_for_each_entry_reverse(ops, &pernet_list, list) {
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200158 if (ops->exit)
159 ops->exit(net);
160 }
161
162 mutex_unlock(&net_mutex);
163
164 /* Ensure there are no outstanding rcu callbacks using this
165 * network namespace.
166 */
167 rcu_barrier();
168
169 /* Finally it is safe to free my network namespace structure */
170 net_free(net);
171}
172
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200173void __put_net(struct net *net)
174{
175 /* Cleanup the network namespace in process context */
176 INIT_WORK(&net->work, cleanup_net);
Benjamin Thery3ef13552007-11-19 23:18:16 -0800177 queue_work(netns_wq, &net->work);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200178}
179EXPORT_SYMBOL_GPL(__put_net);
180
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700181#else
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700182struct net *copy_net_ns(unsigned long flags, struct net *old_net)
183{
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700184 if (flags & CLONE_NEWNET)
185 return ERR_PTR(-EINVAL);
186 return old_net;
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700187}
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700188#endif
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700189
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200190static int __init net_ns_init(void)
191{
192 int err;
193
194 printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
Pavel Emelyanovd57a9212007-11-01 00:46:50 -0700195#ifdef CONFIG_NET_NS
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200196 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
197 SMP_CACHE_BYTES,
198 SLAB_PANIC, NULL);
Benjamin Thery3ef13552007-11-19 23:18:16 -0800199
200 /* Create workqueue for cleanup */
201 netns_wq = create_singlethread_workqueue("netns");
202 if (!netns_wq)
203 panic("Could not create netns workq");
Pavel Emelyanovd57a9212007-11-01 00:46:50 -0700204#endif
Benjamin Thery3ef13552007-11-19 23:18:16 -0800205
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200206 mutex_lock(&net_mutex);
207 err = setup_net(&init_net);
208
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700209 rtnl_lock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200210 list_add_tail(&init_net.list, &net_namespace_list);
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700211 rtnl_unlock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200212
213 mutex_unlock(&net_mutex);
214 if (err)
215 panic("Could not setup the initial network namespace");
216
217 return 0;
218}
219
220pure_initcall(net_ns_init);
221
Denis V. Luneved160e82007-11-13 03:23:21 -0800222#ifdef CONFIG_NET_NS
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200223static int register_pernet_operations(struct list_head *list,
224 struct pernet_operations *ops)
225{
226 struct net *net, *undo_net;
227 int error;
228
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200229 list_add_tail(&ops->list, list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700230 if (ops->init) {
231 for_each_net(net) {
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200232 error = ops->init(net);
233 if (error)
234 goto out_undo;
235 }
236 }
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700237 return 0;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200238
239out_undo:
240 /* If I have an error cleanup all namespaces I initialized */
241 list_del(&ops->list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700242 if (ops->exit) {
243 for_each_net(undo_net) {
244 if (undo_net == net)
245 goto undone;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200246 ops->exit(undo_net);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700247 }
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200248 }
249undone:
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700250 return error;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200251}
252
253static void unregister_pernet_operations(struct pernet_operations *ops)
254{
255 struct net *net;
256
257 list_del(&ops->list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700258 if (ops->exit)
259 for_each_net(net)
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200260 ops->exit(net);
261}
262
Denis V. Luneved160e82007-11-13 03:23:21 -0800263#else
264
265static int register_pernet_operations(struct list_head *list,
266 struct pernet_operations *ops)
267{
268 if (ops->init == NULL)
269 return 0;
270 return ops->init(&init_net);
271}
272
273static void unregister_pernet_operations(struct pernet_operations *ops)
274{
275 if (ops->exit)
276 ops->exit(&init_net);
277}
278#endif
279
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700280static DEFINE_IDA(net_generic_ids);
281
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200282/**
283 * register_pernet_subsys - register a network namespace subsystem
284 * @ops: pernet operations structure for the subsystem
285 *
286 * Register a subsystem which has init and exit functions
287 * that are called when network namespaces are created and
288 * destroyed respectively.
289 *
290 * When registered all network namespace init functions are
291 * called for every existing network namespace. Allowing kernel
292 * modules to have a race free view of the set of network namespaces.
293 *
294 * When a new network namespace is created all of the init
295 * methods are called in the order in which they were registered.
296 *
297 * When a network namespace is destroyed all of the exit methods
298 * are called in the reverse of the order with which they were
299 * registered.
300 */
301int register_pernet_subsys(struct pernet_operations *ops)
302{
303 int error;
304 mutex_lock(&net_mutex);
305 error = register_pernet_operations(first_device, ops);
306 mutex_unlock(&net_mutex);
307 return error;
308}
309EXPORT_SYMBOL_GPL(register_pernet_subsys);
310
311/**
312 * unregister_pernet_subsys - unregister a network namespace subsystem
313 * @ops: pernet operations structure to manipulate
314 *
315 * Remove the pernet operations structure from the list to be
Oliver Pinter53379e52008-02-03 17:56:48 +0200316 * used when network namespaces are created or destroyed. In
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200317 * addition run the exit method for all existing network
318 * namespaces.
319 */
320void unregister_pernet_subsys(struct pernet_operations *module)
321{
322 mutex_lock(&net_mutex);
323 unregister_pernet_operations(module);
324 mutex_unlock(&net_mutex);
325}
326EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
327
Alexey Dobriyan485ac572008-10-30 23:55:16 -0700328int register_pernet_gen_subsys(int *id, struct pernet_operations *ops)
329{
330 int rv;
331
332 mutex_lock(&net_mutex);
333again:
334 rv = ida_get_new_above(&net_generic_ids, 1, id);
335 if (rv < 0) {
336 if (rv == -EAGAIN) {
337 ida_pre_get(&net_generic_ids, GFP_KERNEL);
338 goto again;
339 }
340 goto out;
341 }
342 rv = register_pernet_operations(first_device, ops);
343 if (rv < 0)
344 ida_remove(&net_generic_ids, *id);
345 mutex_unlock(&net_mutex);
346out:
347 return rv;
348}
349EXPORT_SYMBOL_GPL(register_pernet_gen_subsys);
350
351void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops)
352{
353 mutex_lock(&net_mutex);
354 unregister_pernet_operations(ops);
355 ida_remove(&net_generic_ids, id);
356 mutex_unlock(&net_mutex);
357}
358EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys);
359
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200360/**
361 * register_pernet_device - register a network namespace device
362 * @ops: pernet operations structure for the subsystem
363 *
364 * Register a device which has init and exit functions
365 * that are called when network namespaces are created and
366 * destroyed respectively.
367 *
368 * When registered all network namespace init functions are
369 * called for every existing network namespace. Allowing kernel
370 * modules to have a race free view of the set of network namespaces.
371 *
372 * When a new network namespace is created all of the init
373 * methods are called in the order in which they were registered.
374 *
375 * When a network namespace is destroyed all of the exit methods
376 * are called in the reverse of the order with which they were
377 * registered.
378 */
379int register_pernet_device(struct pernet_operations *ops)
380{
381 int error;
382 mutex_lock(&net_mutex);
383 error = register_pernet_operations(&pernet_list, ops);
384 if (!error && (first_device == &pernet_list))
385 first_device = &ops->list;
386 mutex_unlock(&net_mutex);
387 return error;
388}
389EXPORT_SYMBOL_GPL(register_pernet_device);
390
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700391int register_pernet_gen_device(int *id, struct pernet_operations *ops)
392{
393 int error;
394 mutex_lock(&net_mutex);
395again:
396 error = ida_get_new_above(&net_generic_ids, 1, id);
397 if (error) {
398 if (error == -EAGAIN) {
399 ida_pre_get(&net_generic_ids, GFP_KERNEL);
400 goto again;
401 }
402 goto out;
403 }
404 error = register_pernet_operations(&pernet_list, ops);
405 if (error)
406 ida_remove(&net_generic_ids, *id);
407 else if (first_device == &pernet_list)
408 first_device = &ops->list;
409out:
410 mutex_unlock(&net_mutex);
411 return error;
412}
413EXPORT_SYMBOL_GPL(register_pernet_gen_device);
414
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200415/**
416 * unregister_pernet_device - unregister a network namespace netdevice
417 * @ops: pernet operations structure to manipulate
418 *
419 * Remove the pernet operations structure from the list to be
Oliver Pinter53379e52008-02-03 17:56:48 +0200420 * used when network namespaces are created or destroyed. In
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200421 * addition run the exit method for all existing network
422 * namespaces.
423 */
424void unregister_pernet_device(struct pernet_operations *ops)
425{
426 mutex_lock(&net_mutex);
427 if (&ops->list == first_device)
428 first_device = first_device->next;
429 unregister_pernet_operations(ops);
430 mutex_unlock(&net_mutex);
431}
432EXPORT_SYMBOL_GPL(unregister_pernet_device);
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700433
434void unregister_pernet_gen_device(int id, struct pernet_operations *ops)
435{
436 mutex_lock(&net_mutex);
437 if (&ops->list == first_device)
438 first_device = first_device->next;
439 unregister_pernet_operations(ops);
440 ida_remove(&net_generic_ids, id);
441 mutex_unlock(&net_mutex);
442}
443EXPORT_SYMBOL_GPL(unregister_pernet_gen_device);
Pavel Emelyanovdec827d2008-04-15 00:36:08 -0700444
445static void net_generic_release(struct rcu_head *rcu)
446{
447 struct net_generic *ng;
448
449 ng = container_of(rcu, struct net_generic, rcu);
450 kfree(ng);
451}
452
453int net_assign_generic(struct net *net, int id, void *data)
454{
455 struct net_generic *ng, *old_ng;
456
457 BUG_ON(!mutex_is_locked(&net_mutex));
458 BUG_ON(id == 0);
459
460 ng = old_ng = net->gen;
461 if (old_ng->len >= id)
462 goto assign;
463
464 ng = kzalloc(sizeof(struct net_generic) +
465 id * sizeof(void *), GFP_KERNEL);
466 if (ng == NULL)
467 return -ENOMEM;
468
469 /*
470 * Some synchronisation notes:
471 *
472 * The net_generic explores the net->gen array inside rcu
473 * read section. Besides once set the net->gen->ptr[x]
474 * pointer never changes (see rules in netns/generic.h).
475 *
476 * That said, we simply duplicate this array and schedule
477 * the old copy for kfree after a grace period.
478 */
479
480 ng->len = id;
481 INIT_RCU_HEAD(&ng->rcu);
482 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len);
483
484 rcu_assign_pointer(net->gen, ng);
485 call_rcu(&old_ng->rcu, net_generic_release);
486assign:
487 ng->ptr[id - 1] = data;
488 return 0;
489}
490EXPORT_SYMBOL_GPL(net_assign_generic);