blob: b0767abf23e57f32e73d54ca2b44ebdeec1d5f11 [file] [log] [blame]
Eric W. Biederman5f256be2007-09-12 11:50:50 +02001#include <linux/workqueue.h>
2#include <linux/rtnetlink.h>
3#include <linux/cache.h>
4#include <linux/slab.h>
5#include <linux/list.h>
6#include <linux/delay.h>
Eric W. Biederman9dd776b2007-09-26 22:04:26 -07007#include <linux/sched.h>
Pavel Emelyanovc93cf612008-04-15 00:35:23 -07008#include <linux/idr.h>
Eric W. Biederman5f256be2007-09-12 11:50:50 +02009#include <net/net_namespace.h>
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070010#include <net/netns/generic.h>
Eric W. Biederman5f256be2007-09-12 11:50:50 +020011
12/*
13 * Our network namespace constructor/destructor lists
14 */
15
16static LIST_HEAD(pernet_list);
17static struct list_head *first_device = &pernet_list;
18static DEFINE_MUTEX(net_mutex);
19
Eric W. Biederman5f256be2007-09-12 11:50:50 +020020LIST_HEAD(net_namespace_list);
Alexey Dobriyanb76a4612008-10-08 11:35:06 +020021EXPORT_SYMBOL_GPL(net_namespace_list);
Eric W. Biederman5f256be2007-09-12 11:50:50 +020022
Eric W. Biederman5f256be2007-09-12 11:50:50 +020023struct net init_net;
Denis V. Lunevff4b9502008-01-22 22:05:33 -080024EXPORT_SYMBOL(init_net);
Eric W. Biederman5f256be2007-09-12 11:50:50 +020025
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070026#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
27
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070028/*
29 * setup_net runs the initializers for the network namespace object.
30 */
Pavel Emelyanov1a2ee932007-11-01 00:45:59 -070031static __net_init int setup_net(struct net *net)
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070032{
33 /* Must be called with net_mutex held */
34 struct pernet_operations *ops;
Daniel Lezcano486a87f2009-02-22 00:07:53 -080035 int error = 0;
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070036
37 atomic_set(&net->count, 1);
Daniel Lezcano486a87f2009-02-22 00:07:53 -080038
Denis V. Lunev5d1e4462008-04-16 01:58:04 -070039#ifdef NETNS_REFCNT_DEBUG
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070040 atomic_set(&net->use_count, 0);
Denis V. Lunev5d1e4462008-04-16 01:58:04 -070041#endif
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070042
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070043 list_for_each_entry(ops, &pernet_list, list) {
44 if (ops->init) {
45 error = ops->init(net);
46 if (error < 0)
47 goto out_undo;
48 }
49 }
50out:
51 return error;
52
53out_undo:
54 /* Walk through the list backwards calling the exit functions
55 * for the pernet modules whose init functions did not fail.
56 */
57 list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
58 if (ops->exit)
59 ops->exit(net);
60 }
61
62 rcu_barrier();
63 goto out;
64}
65
66#ifdef CONFIG_NET_NS
Pavel Emelyanovd57a9212007-11-01 00:46:50 -070067static struct kmem_cache *net_cachep;
Benjamin Thery3ef13552007-11-19 23:18:16 -080068static struct workqueue_struct *netns_wq;
Pavel Emelyanovd57a9212007-11-01 00:46:50 -070069
Daniel Lezcano486a87f2009-02-22 00:07:53 -080070static struct net_generic *net_alloc_generic(void)
71{
72 struct net_generic *ng;
73 size_t generic_size = sizeof(struct net_generic) +
74 INITIAL_NET_GEN_PTRS * sizeof(void *);
75
76 ng = kzalloc(generic_size, GFP_KERNEL);
77 if (ng)
78 ng->len = INITIAL_NET_GEN_PTRS;
79
80 return ng;
81}
82
Eric W. Biederman5f256be2007-09-12 11:50:50 +020083static struct net *net_alloc(void)
84{
Daniel Lezcano486a87f2009-02-22 00:07:53 -080085 struct net *net = NULL;
86 struct net_generic *ng;
87
88 ng = net_alloc_generic();
89 if (!ng)
90 goto out;
91
92 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
93 if (!net)
94 goto out_free;
95
96 rcu_assign_pointer(net->gen, ng);
97out:
98 return net;
99
100out_free:
101 kfree(ng);
102 goto out;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200103}
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200104
Johann Felix Soden45a19b02007-11-07 01:30:30 -0800105static void net_free(struct net *net)
106{
Denis V. Lunev5d1e4462008-04-16 01:58:04 -0700107#ifdef NETNS_REFCNT_DEBUG
Johann Felix Soden45a19b02007-11-07 01:30:30 -0800108 if (unlikely(atomic_read(&net->use_count) != 0)) {
109 printk(KERN_EMERG "network namespace not free! Usage: %d\n",
110 atomic_read(&net->use_count));
111 return;
112 }
Denis V. Lunev5d1e4462008-04-16 01:58:04 -0700113#endif
Alexey Dobriyan4ef079c2008-10-14 22:54:48 -0700114 kfree(net->gen);
Johann Felix Soden45a19b02007-11-07 01:30:30 -0800115 kmem_cache_free(net_cachep, net);
116}
117
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700118struct net *copy_net_ns(unsigned long flags, struct net *old_net)
119{
120 struct net *new_net = NULL;
121 int err;
122
123 get_net(old_net);
124
125 if (!(flags & CLONE_NEWNET))
126 return old_net;
127
128 err = -ENOMEM;
129 new_net = net_alloc();
130 if (!new_net)
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800131 goto out_err;
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700132
133 mutex_lock(&net_mutex);
134 err = setup_net(new_net);
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800135 if (!err) {
136 rtnl_lock();
137 list_add_tail(&new_net->list, &net_namespace_list);
138 rtnl_unlock();
139 }
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700140 mutex_unlock(&net_mutex);
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800141
142 if (err)
143 goto out_free;
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700144out:
145 put_net(old_net);
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700146 return new_net;
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800147
148out_free:
149 net_free(new_net);
150out_err:
151 new_net = ERR_PTR(err);
152 goto out;
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700153}
154
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200155static void cleanup_net(struct work_struct *work)
156{
157 struct pernet_operations *ops;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200158 struct net *net;
159
Eric W. Biedermanb9f75f42008-06-20 22:16:51 -0700160 /* Be very certain incoming network packets will not find us */
161 rcu_barrier();
162
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200163 net = container_of(work, struct net, work);
164
165 mutex_lock(&net_mutex);
166
167 /* Don't let anyone else find us. */
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700168 rtnl_lock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200169 list_del(&net->list);
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700170 rtnl_unlock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200171
172 /* Run all of the network namespace exit methods */
Pavel Emelyanov768f35912007-09-18 13:20:41 -0700173 list_for_each_entry_reverse(ops, &pernet_list, list) {
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200174 if (ops->exit)
175 ops->exit(net);
176 }
177
178 mutex_unlock(&net_mutex);
179
180 /* Ensure there are no outstanding rcu callbacks using this
181 * network namespace.
182 */
183 rcu_barrier();
184
185 /* Finally it is safe to free my network namespace structure */
186 net_free(net);
187}
188
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200189void __put_net(struct net *net)
190{
191 /* Cleanup the network namespace in process context */
192 INIT_WORK(&net->work, cleanup_net);
Benjamin Thery3ef13552007-11-19 23:18:16 -0800193 queue_work(netns_wq, &net->work);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200194}
195EXPORT_SYMBOL_GPL(__put_net);
196
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700197#else
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700198struct net *copy_net_ns(unsigned long flags, struct net *old_net)
199{
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700200 if (flags & CLONE_NEWNET)
201 return ERR_PTR(-EINVAL);
202 return old_net;
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700203}
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700204#endif
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700205
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200206static int __init net_ns_init(void)
207{
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800208 struct net_generic *ng;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200209 int err;
210
211 printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
Pavel Emelyanovd57a9212007-11-01 00:46:50 -0700212#ifdef CONFIG_NET_NS
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200213 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
214 SMP_CACHE_BYTES,
215 SLAB_PANIC, NULL);
Benjamin Thery3ef13552007-11-19 23:18:16 -0800216
217 /* Create workqueue for cleanup */
218 netns_wq = create_singlethread_workqueue("netns");
219 if (!netns_wq)
220 panic("Could not create netns workq");
Pavel Emelyanovd57a9212007-11-01 00:46:50 -0700221#endif
Benjamin Thery3ef13552007-11-19 23:18:16 -0800222
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800223 ng = net_alloc_generic();
224 if (!ng)
225 panic("Could not allocate generic netns");
226
227 rcu_assign_pointer(init_net.gen, ng);
228
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200229 mutex_lock(&net_mutex);
230 err = setup_net(&init_net);
231
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700232 rtnl_lock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200233 list_add_tail(&init_net.list, &net_namespace_list);
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700234 rtnl_unlock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200235
236 mutex_unlock(&net_mutex);
237 if (err)
238 panic("Could not setup the initial network namespace");
239
240 return 0;
241}
242
243pure_initcall(net_ns_init);
244
Denis V. Luneved160e82007-11-13 03:23:21 -0800245#ifdef CONFIG_NET_NS
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200246static int register_pernet_operations(struct list_head *list,
247 struct pernet_operations *ops)
248{
249 struct net *net, *undo_net;
250 int error;
251
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200252 list_add_tail(&ops->list, list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700253 if (ops->init) {
254 for_each_net(net) {
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200255 error = ops->init(net);
256 if (error)
257 goto out_undo;
258 }
259 }
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700260 return 0;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200261
262out_undo:
263 /* If I have an error cleanup all namespaces I initialized */
264 list_del(&ops->list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700265 if (ops->exit) {
266 for_each_net(undo_net) {
267 if (undo_net == net)
268 goto undone;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200269 ops->exit(undo_net);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700270 }
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200271 }
272undone:
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700273 return error;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200274}
275
276static void unregister_pernet_operations(struct pernet_operations *ops)
277{
278 struct net *net;
279
280 list_del(&ops->list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700281 if (ops->exit)
282 for_each_net(net)
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200283 ops->exit(net);
284}
285
Denis V. Luneved160e82007-11-13 03:23:21 -0800286#else
287
288static int register_pernet_operations(struct list_head *list,
289 struct pernet_operations *ops)
290{
291 if (ops->init == NULL)
292 return 0;
293 return ops->init(&init_net);
294}
295
296static void unregister_pernet_operations(struct pernet_operations *ops)
297{
298 if (ops->exit)
299 ops->exit(&init_net);
300}
301#endif
302
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700303static DEFINE_IDA(net_generic_ids);
304
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200305/**
306 * register_pernet_subsys - register a network namespace subsystem
307 * @ops: pernet operations structure for the subsystem
308 *
309 * Register a subsystem which has init and exit functions
310 * that are called when network namespaces are created and
311 * destroyed respectively.
312 *
313 * When registered all network namespace init functions are
314 * called for every existing network namespace. Allowing kernel
315 * modules to have a race free view of the set of network namespaces.
316 *
317 * When a new network namespace is created all of the init
318 * methods are called in the order in which they were registered.
319 *
320 * When a network namespace is destroyed all of the exit methods
321 * are called in the reverse of the order with which they were
322 * registered.
323 */
324int register_pernet_subsys(struct pernet_operations *ops)
325{
326 int error;
327 mutex_lock(&net_mutex);
328 error = register_pernet_operations(first_device, ops);
329 mutex_unlock(&net_mutex);
330 return error;
331}
332EXPORT_SYMBOL_GPL(register_pernet_subsys);
333
334/**
335 * unregister_pernet_subsys - unregister a network namespace subsystem
336 * @ops: pernet operations structure to manipulate
337 *
338 * Remove the pernet operations structure from the list to be
Oliver Pinter53379e52008-02-03 17:56:48 +0200339 * used when network namespaces are created or destroyed. In
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200340 * addition run the exit method for all existing network
341 * namespaces.
342 */
343void unregister_pernet_subsys(struct pernet_operations *module)
344{
345 mutex_lock(&net_mutex);
346 unregister_pernet_operations(module);
347 mutex_unlock(&net_mutex);
348}
349EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
350
Alexey Dobriyan485ac572008-10-30 23:55:16 -0700351int register_pernet_gen_subsys(int *id, struct pernet_operations *ops)
352{
353 int rv;
354
355 mutex_lock(&net_mutex);
356again:
357 rv = ida_get_new_above(&net_generic_ids, 1, id);
358 if (rv < 0) {
359 if (rv == -EAGAIN) {
360 ida_pre_get(&net_generic_ids, GFP_KERNEL);
361 goto again;
362 }
363 goto out;
364 }
365 rv = register_pernet_operations(first_device, ops);
366 if (rv < 0)
367 ida_remove(&net_generic_ids, *id);
Alexey Dobriyan485ac572008-10-30 23:55:16 -0700368out:
Jiri Slaby357f5b02009-01-17 06:47:12 +0000369 mutex_unlock(&net_mutex);
Alexey Dobriyan485ac572008-10-30 23:55:16 -0700370 return rv;
371}
372EXPORT_SYMBOL_GPL(register_pernet_gen_subsys);
373
374void unregister_pernet_gen_subsys(int id, struct pernet_operations *ops)
375{
376 mutex_lock(&net_mutex);
377 unregister_pernet_operations(ops);
378 ida_remove(&net_generic_ids, id);
379 mutex_unlock(&net_mutex);
380}
381EXPORT_SYMBOL_GPL(unregister_pernet_gen_subsys);
382
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200383/**
384 * register_pernet_device - register a network namespace device
385 * @ops: pernet operations structure for the subsystem
386 *
387 * Register a device which has init and exit functions
388 * that are called when network namespaces are created and
389 * destroyed respectively.
390 *
391 * When registered all network namespace init functions are
392 * called for every existing network namespace. Allowing kernel
393 * modules to have a race free view of the set of network namespaces.
394 *
395 * When a new network namespace is created all of the init
396 * methods are called in the order in which they were registered.
397 *
398 * When a network namespace is destroyed all of the exit methods
399 * are called in the reverse of the order with which they were
400 * registered.
401 */
402int register_pernet_device(struct pernet_operations *ops)
403{
404 int error;
405 mutex_lock(&net_mutex);
406 error = register_pernet_operations(&pernet_list, ops);
407 if (!error && (first_device == &pernet_list))
408 first_device = &ops->list;
409 mutex_unlock(&net_mutex);
410 return error;
411}
412EXPORT_SYMBOL_GPL(register_pernet_device);
413
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700414int register_pernet_gen_device(int *id, struct pernet_operations *ops)
415{
416 int error;
417 mutex_lock(&net_mutex);
418again:
419 error = ida_get_new_above(&net_generic_ids, 1, id);
420 if (error) {
421 if (error == -EAGAIN) {
422 ida_pre_get(&net_generic_ids, GFP_KERNEL);
423 goto again;
424 }
425 goto out;
426 }
427 error = register_pernet_operations(&pernet_list, ops);
428 if (error)
429 ida_remove(&net_generic_ids, *id);
430 else if (first_device == &pernet_list)
431 first_device = &ops->list;
432out:
433 mutex_unlock(&net_mutex);
434 return error;
435}
436EXPORT_SYMBOL_GPL(register_pernet_gen_device);
437
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200438/**
439 * unregister_pernet_device - unregister a network namespace netdevice
440 * @ops: pernet operations structure to manipulate
441 *
442 * Remove the pernet operations structure from the list to be
Oliver Pinter53379e52008-02-03 17:56:48 +0200443 * used when network namespaces are created or destroyed. In
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200444 * addition run the exit method for all existing network
445 * namespaces.
446 */
447void unregister_pernet_device(struct pernet_operations *ops)
448{
449 mutex_lock(&net_mutex);
450 if (&ops->list == first_device)
451 first_device = first_device->next;
452 unregister_pernet_operations(ops);
453 mutex_unlock(&net_mutex);
454}
455EXPORT_SYMBOL_GPL(unregister_pernet_device);
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700456
457void unregister_pernet_gen_device(int id, struct pernet_operations *ops)
458{
459 mutex_lock(&net_mutex);
460 if (&ops->list == first_device)
461 first_device = first_device->next;
462 unregister_pernet_operations(ops);
463 ida_remove(&net_generic_ids, id);
464 mutex_unlock(&net_mutex);
465}
466EXPORT_SYMBOL_GPL(unregister_pernet_gen_device);
Pavel Emelyanovdec827d2008-04-15 00:36:08 -0700467
468static void net_generic_release(struct rcu_head *rcu)
469{
470 struct net_generic *ng;
471
472 ng = container_of(rcu, struct net_generic, rcu);
473 kfree(ng);
474}
475
476int net_assign_generic(struct net *net, int id, void *data)
477{
478 struct net_generic *ng, *old_ng;
479
480 BUG_ON(!mutex_is_locked(&net_mutex));
481 BUG_ON(id == 0);
482
483 ng = old_ng = net->gen;
484 if (old_ng->len >= id)
485 goto assign;
486
487 ng = kzalloc(sizeof(struct net_generic) +
488 id * sizeof(void *), GFP_KERNEL);
489 if (ng == NULL)
490 return -ENOMEM;
491
492 /*
493 * Some synchronisation notes:
494 *
495 * The net_generic explores the net->gen array inside rcu
496 * read section. Besides once set the net->gen->ptr[x]
497 * pointer never changes (see rules in netns/generic.h).
498 *
499 * That said, we simply duplicate this array and schedule
500 * the old copy for kfree after a grace period.
501 */
502
503 ng->len = id;
Pavel Emelyanovdec827d2008-04-15 00:36:08 -0700504 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len);
505
506 rcu_assign_pointer(net->gen, ng);
507 call_rcu(&old_ng->rcu, net_generic_release);
508assign:
509 ng->ptr[id - 1] = data;
510 return 0;
511}
512EXPORT_SYMBOL_GPL(net_assign_generic);