blob: 763674e1e593082e7cdcf694af7301ba1a34ecfc [file] [log] [blame]
Eric W. Biederman5f256be2007-09-12 11:50:50 +02001#include <linux/workqueue.h>
2#include <linux/rtnetlink.h>
3#include <linux/cache.h>
4#include <linux/slab.h>
5#include <linux/list.h>
6#include <linux/delay.h>
Eric W. Biederman9dd776b2007-09-26 22:04:26 -07007#include <linux/sched.h>
Pavel Emelyanovc93cf612008-04-15 00:35:23 -07008#include <linux/idr.h>
Eric W. Biederman5f256be2007-09-12 11:50:50 +02009#include <net/net_namespace.h>
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070010#include <net/netns/generic.h>
Eric W. Biederman5f256be2007-09-12 11:50:50 +020011
12/*
13 * Our network namespace constructor/destructor lists
14 */
15
16static LIST_HEAD(pernet_list);
17static struct list_head *first_device = &pernet_list;
18static DEFINE_MUTEX(net_mutex);
19
Eric W. Biederman5f256be2007-09-12 11:50:50 +020020LIST_HEAD(net_namespace_list);
21
Eric W. Biederman5f256be2007-09-12 11:50:50 +020022struct net init_net;
Denis V. Lunevff4b9502008-01-22 22:05:33 -080023EXPORT_SYMBOL(init_net);
Eric W. Biederman5f256be2007-09-12 11:50:50 +020024
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070025#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
26
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070027/*
28 * setup_net runs the initializers for the network namespace object.
29 */
Pavel Emelyanov1a2ee932007-11-01 00:45:59 -070030static __net_init int setup_net(struct net *net)
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070031{
32 /* Must be called with net_mutex held */
33 struct pernet_operations *ops;
34 int error;
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070035 struct net_generic *ng;
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070036
37 atomic_set(&net->count, 1);
38 atomic_set(&net->use_count, 0);
39
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070040 error = -ENOMEM;
41 ng = kzalloc(sizeof(struct net_generic) +
42 INITIAL_NET_GEN_PTRS * sizeof(void *), GFP_KERNEL);
43 if (ng == NULL)
44 goto out;
45
46 ng->len = INITIAL_NET_GEN_PTRS;
47 INIT_RCU_HEAD(&ng->rcu);
48 rcu_assign_pointer(net->gen, ng);
49
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070050 error = 0;
51 list_for_each_entry(ops, &pernet_list, list) {
52 if (ops->init) {
53 error = ops->init(net);
54 if (error < 0)
55 goto out_undo;
56 }
57 }
58out:
59 return error;
60
61out_undo:
62 /* Walk through the list backwards calling the exit functions
63 * for the pernet modules whose init functions did not fail.
64 */
65 list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
66 if (ops->exit)
67 ops->exit(net);
68 }
69
70 rcu_barrier();
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070071 kfree(ng);
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070072 goto out;
73}
74
75#ifdef CONFIG_NET_NS
Pavel Emelyanovd57a9212007-11-01 00:46:50 -070076static struct kmem_cache *net_cachep;
Benjamin Thery3ef13552007-11-19 23:18:16 -080077static struct workqueue_struct *netns_wq;
Pavel Emelyanovd57a9212007-11-01 00:46:50 -070078
Eric W. Biederman5f256be2007-09-12 11:50:50 +020079static struct net *net_alloc(void)
80{
Pavel Emelyanov32f0c4c2007-10-09 13:02:17 -070081 return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
Eric W. Biederman5f256be2007-09-12 11:50:50 +020082}
Eric W. Biederman5f256be2007-09-12 11:50:50 +020083
Johann Felix Soden45a19b02007-11-07 01:30:30 -080084static void net_free(struct net *net)
85{
86 if (!net)
87 return;
88
89 if (unlikely(atomic_read(&net->use_count) != 0)) {
90 printk(KERN_EMERG "network namespace not free! Usage: %d\n",
91 atomic_read(&net->use_count));
92 return;
93 }
94
95 kmem_cache_free(net_cachep, net);
96}
97
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070098struct net *copy_net_ns(unsigned long flags, struct net *old_net)
99{
100 struct net *new_net = NULL;
101 int err;
102
103 get_net(old_net);
104
105 if (!(flags & CLONE_NEWNET))
106 return old_net;
107
108 err = -ENOMEM;
109 new_net = net_alloc();
110 if (!new_net)
111 goto out;
112
113 mutex_lock(&net_mutex);
114 err = setup_net(new_net);
115 if (err)
116 goto out_unlock;
117
118 rtnl_lock();
119 list_add_tail(&new_net->list, &net_namespace_list);
120 rtnl_unlock();
121
122
123out_unlock:
124 mutex_unlock(&net_mutex);
125out:
126 put_net(old_net);
127 if (err) {
128 net_free(new_net);
129 new_net = ERR_PTR(err);
130 }
131 return new_net;
132}
133
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200134static void cleanup_net(struct work_struct *work)
135{
136 struct pernet_operations *ops;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200137 struct net *net;
138
139 net = container_of(work, struct net, work);
140
141 mutex_lock(&net_mutex);
142
143 /* Don't let anyone else find us. */
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700144 rtnl_lock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200145 list_del(&net->list);
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700146 rtnl_unlock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200147
148 /* Run all of the network namespace exit methods */
Pavel Emelyanov768f35912007-09-18 13:20:41 -0700149 list_for_each_entry_reverse(ops, &pernet_list, list) {
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200150 if (ops->exit)
151 ops->exit(net);
152 }
153
154 mutex_unlock(&net_mutex);
155
156 /* Ensure there are no outstanding rcu callbacks using this
157 * network namespace.
158 */
159 rcu_barrier();
160
161 /* Finally it is safe to free my network namespace structure */
162 net_free(net);
163}
164
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200165void __put_net(struct net *net)
166{
167 /* Cleanup the network namespace in process context */
168 INIT_WORK(&net->work, cleanup_net);
Benjamin Thery3ef13552007-11-19 23:18:16 -0800169 queue_work(netns_wq, &net->work);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200170}
171EXPORT_SYMBOL_GPL(__put_net);
172
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700173#else
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700174struct net *copy_net_ns(unsigned long flags, struct net *old_net)
175{
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700176 if (flags & CLONE_NEWNET)
177 return ERR_PTR(-EINVAL);
178 return old_net;
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700179}
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700180#endif
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700181
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200182static int __init net_ns_init(void)
183{
184 int err;
185
186 printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
Pavel Emelyanovd57a9212007-11-01 00:46:50 -0700187#ifdef CONFIG_NET_NS
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200188 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
189 SMP_CACHE_BYTES,
190 SLAB_PANIC, NULL);
Benjamin Thery3ef13552007-11-19 23:18:16 -0800191
192 /* Create workqueue for cleanup */
193 netns_wq = create_singlethread_workqueue("netns");
194 if (!netns_wq)
195 panic("Could not create netns workq");
Pavel Emelyanovd57a9212007-11-01 00:46:50 -0700196#endif
Benjamin Thery3ef13552007-11-19 23:18:16 -0800197
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200198 mutex_lock(&net_mutex);
199 err = setup_net(&init_net);
200
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700201 rtnl_lock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200202 list_add_tail(&init_net.list, &net_namespace_list);
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700203 rtnl_unlock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200204
205 mutex_unlock(&net_mutex);
206 if (err)
207 panic("Could not setup the initial network namespace");
208
209 return 0;
210}
211
212pure_initcall(net_ns_init);
213
Denis V. Luneved160e82007-11-13 03:23:21 -0800214#ifdef CONFIG_NET_NS
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200215static int register_pernet_operations(struct list_head *list,
216 struct pernet_operations *ops)
217{
218 struct net *net, *undo_net;
219 int error;
220
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200221 list_add_tail(&ops->list, list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700222 if (ops->init) {
223 for_each_net(net) {
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200224 error = ops->init(net);
225 if (error)
226 goto out_undo;
227 }
228 }
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700229 return 0;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200230
231out_undo:
232 /* If I have an error cleanup all namespaces I initialized */
233 list_del(&ops->list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700234 if (ops->exit) {
235 for_each_net(undo_net) {
236 if (undo_net == net)
237 goto undone;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200238 ops->exit(undo_net);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700239 }
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200240 }
241undone:
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700242 return error;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200243}
244
245static void unregister_pernet_operations(struct pernet_operations *ops)
246{
247 struct net *net;
248
249 list_del(&ops->list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700250 if (ops->exit)
251 for_each_net(net)
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200252 ops->exit(net);
253}
254
Denis V. Luneved160e82007-11-13 03:23:21 -0800255#else
256
257static int register_pernet_operations(struct list_head *list,
258 struct pernet_operations *ops)
259{
260 if (ops->init == NULL)
261 return 0;
262 return ops->init(&init_net);
263}
264
265static void unregister_pernet_operations(struct pernet_operations *ops)
266{
267 if (ops->exit)
268 ops->exit(&init_net);
269}
270#endif
271
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700272static DEFINE_IDA(net_generic_ids);
273
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200274/**
275 * register_pernet_subsys - register a network namespace subsystem
276 * @ops: pernet operations structure for the subsystem
277 *
278 * Register a subsystem which has init and exit functions
279 * that are called when network namespaces are created and
280 * destroyed respectively.
281 *
282 * When registered all network namespace init functions are
283 * called for every existing network namespace. Allowing kernel
284 * modules to have a race free view of the set of network namespaces.
285 *
286 * When a new network namespace is created all of the init
287 * methods are called in the order in which they were registered.
288 *
289 * When a network namespace is destroyed all of the exit methods
290 * are called in the reverse of the order with which they were
291 * registered.
292 */
293int register_pernet_subsys(struct pernet_operations *ops)
294{
295 int error;
296 mutex_lock(&net_mutex);
297 error = register_pernet_operations(first_device, ops);
298 mutex_unlock(&net_mutex);
299 return error;
300}
301EXPORT_SYMBOL_GPL(register_pernet_subsys);
302
303/**
304 * unregister_pernet_subsys - unregister a network namespace subsystem
305 * @ops: pernet operations structure to manipulate
306 *
307 * Remove the pernet operations structure from the list to be
Oliver Pinter53379e52008-02-03 17:56:48 +0200308 * used when network namespaces are created or destroyed. In
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200309 * addition run the exit method for all existing network
310 * namespaces.
311 */
312void unregister_pernet_subsys(struct pernet_operations *module)
313{
314 mutex_lock(&net_mutex);
315 unregister_pernet_operations(module);
316 mutex_unlock(&net_mutex);
317}
318EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
319
320/**
321 * register_pernet_device - register a network namespace device
322 * @ops: pernet operations structure for the subsystem
323 *
324 * Register a device which has init and exit functions
325 * that are called when network namespaces are created and
326 * destroyed respectively.
327 *
328 * When registered all network namespace init functions are
329 * called for every existing network namespace. Allowing kernel
330 * modules to have a race free view of the set of network namespaces.
331 *
332 * When a new network namespace is created all of the init
333 * methods are called in the order in which they were registered.
334 *
335 * When a network namespace is destroyed all of the exit methods
336 * are called in the reverse of the order with which they were
337 * registered.
338 */
339int register_pernet_device(struct pernet_operations *ops)
340{
341 int error;
342 mutex_lock(&net_mutex);
343 error = register_pernet_operations(&pernet_list, ops);
344 if (!error && (first_device == &pernet_list))
345 first_device = &ops->list;
346 mutex_unlock(&net_mutex);
347 return error;
348}
349EXPORT_SYMBOL_GPL(register_pernet_device);
350
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700351int register_pernet_gen_device(int *id, struct pernet_operations *ops)
352{
353 int error;
354 mutex_lock(&net_mutex);
355again:
356 error = ida_get_new_above(&net_generic_ids, 1, id);
357 if (error) {
358 if (error == -EAGAIN) {
359 ida_pre_get(&net_generic_ids, GFP_KERNEL);
360 goto again;
361 }
362 goto out;
363 }
364 error = register_pernet_operations(&pernet_list, ops);
365 if (error)
366 ida_remove(&net_generic_ids, *id);
367 else if (first_device == &pernet_list)
368 first_device = &ops->list;
369out:
370 mutex_unlock(&net_mutex);
371 return error;
372}
373EXPORT_SYMBOL_GPL(register_pernet_gen_device);
374
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200375/**
376 * unregister_pernet_device - unregister a network namespace netdevice
377 * @ops: pernet operations structure to manipulate
378 *
379 * Remove the pernet operations structure from the list to be
Oliver Pinter53379e52008-02-03 17:56:48 +0200380 * used when network namespaces are created or destroyed. In
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200381 * addition run the exit method for all existing network
382 * namespaces.
383 */
384void unregister_pernet_device(struct pernet_operations *ops)
385{
386 mutex_lock(&net_mutex);
387 if (&ops->list == first_device)
388 first_device = first_device->next;
389 unregister_pernet_operations(ops);
390 mutex_unlock(&net_mutex);
391}
392EXPORT_SYMBOL_GPL(unregister_pernet_device);
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700393
394void unregister_pernet_gen_device(int id, struct pernet_operations *ops)
395{
396 mutex_lock(&net_mutex);
397 if (&ops->list == first_device)
398 first_device = first_device->next;
399 unregister_pernet_operations(ops);
400 ida_remove(&net_generic_ids, id);
401 mutex_unlock(&net_mutex);
402}
403EXPORT_SYMBOL_GPL(unregister_pernet_gen_device);
Pavel Emelyanovdec827d2008-04-15 00:36:08 -0700404
405static void net_generic_release(struct rcu_head *rcu)
406{
407 struct net_generic *ng;
408
409 ng = container_of(rcu, struct net_generic, rcu);
410 kfree(ng);
411}
412
413int net_assign_generic(struct net *net, int id, void *data)
414{
415 struct net_generic *ng, *old_ng;
416
417 BUG_ON(!mutex_is_locked(&net_mutex));
418 BUG_ON(id == 0);
419
420 ng = old_ng = net->gen;
421 if (old_ng->len >= id)
422 goto assign;
423
424 ng = kzalloc(sizeof(struct net_generic) +
425 id * sizeof(void *), GFP_KERNEL);
426 if (ng == NULL)
427 return -ENOMEM;
428
429 /*
430 * Some synchronisation notes:
431 *
432 * The net_generic explores the net->gen array inside rcu
433 * read section. Besides once set the net->gen->ptr[x]
434 * pointer never changes (see rules in netns/generic.h).
435 *
436 * That said, we simply duplicate this array and schedule
437 * the old copy for kfree after a grace period.
438 */
439
440 ng->len = id;
441 INIT_RCU_HEAD(&ng->rcu);
442 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len);
443
444 rcu_assign_pointer(net->gen, ng);
445 call_rcu(&old_ng->rcu, net_generic_release);
446assign:
447 ng->ptr[id - 1] = data;
448 return 0;
449}
450EXPORT_SYMBOL_GPL(net_assign_generic);