blob: 72b4c184dd84a4d583a20ef0ed5dbd06618f5757 [file] [log] [blame]
Eric W. Biederman5f256be2007-09-12 11:50:50 +02001#include <linux/workqueue.h>
2#include <linux/rtnetlink.h>
3#include <linux/cache.h>
4#include <linux/slab.h>
5#include <linux/list.h>
6#include <linux/delay.h>
Eric W. Biederman9dd776b2007-09-26 22:04:26 -07007#include <linux/sched.h>
Pavel Emelyanovc93cf612008-04-15 00:35:23 -07008#include <linux/idr.h>
Eric W. Biederman5f256be2007-09-12 11:50:50 +02009#include <net/net_namespace.h>
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070010#include <net/netns/generic.h>
Eric W. Biederman5f256be2007-09-12 11:50:50 +020011
12/*
13 * Our network namespace constructor/destructor lists
14 */
15
16static LIST_HEAD(pernet_list);
17static struct list_head *first_device = &pernet_list;
18static DEFINE_MUTEX(net_mutex);
19
Eric W. Biederman5f256be2007-09-12 11:50:50 +020020LIST_HEAD(net_namespace_list);
21
Eric W. Biederman5f256be2007-09-12 11:50:50 +020022struct net init_net;
Denis V. Lunevff4b9502008-01-22 22:05:33 -080023EXPORT_SYMBOL(init_net);
Eric W. Biederman5f256be2007-09-12 11:50:50 +020024
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070025#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
26
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070027/*
28 * setup_net runs the initializers for the network namespace object.
29 */
Pavel Emelyanov1a2ee932007-11-01 00:45:59 -070030static __net_init int setup_net(struct net *net)
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070031{
32 /* Must be called with net_mutex held */
33 struct pernet_operations *ops;
34 int error;
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070035 struct net_generic *ng;
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070036
37 atomic_set(&net->count, 1);
Denis V. Lunev5d1e4462008-04-16 01:58:04 -070038#ifdef NETNS_REFCNT_DEBUG
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070039 atomic_set(&net->use_count, 0);
Denis V. Lunev5d1e4462008-04-16 01:58:04 -070040#endif
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070041
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070042 error = -ENOMEM;
43 ng = kzalloc(sizeof(struct net_generic) +
44 INITIAL_NET_GEN_PTRS * sizeof(void *), GFP_KERNEL);
45 if (ng == NULL)
46 goto out;
47
48 ng->len = INITIAL_NET_GEN_PTRS;
49 INIT_RCU_HEAD(&ng->rcu);
50 rcu_assign_pointer(net->gen, ng);
51
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070052 error = 0;
53 list_for_each_entry(ops, &pernet_list, list) {
54 if (ops->init) {
55 error = ops->init(net);
56 if (error < 0)
57 goto out_undo;
58 }
59 }
60out:
61 return error;
62
63out_undo:
64 /* Walk through the list backwards calling the exit functions
65 * for the pernet modules whose init functions did not fail.
66 */
67 list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
68 if (ops->exit)
69 ops->exit(net);
70 }
71
72 rcu_barrier();
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070073 kfree(ng);
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -070074 goto out;
75}
76
77#ifdef CONFIG_NET_NS
Pavel Emelyanovd57a9212007-11-01 00:46:50 -070078static struct kmem_cache *net_cachep;
Benjamin Thery3ef13552007-11-19 23:18:16 -080079static struct workqueue_struct *netns_wq;
Pavel Emelyanovd57a9212007-11-01 00:46:50 -070080
Eric W. Biederman5f256be2007-09-12 11:50:50 +020081static struct net *net_alloc(void)
82{
Pavel Emelyanov32f0c4c2007-10-09 13:02:17 -070083 return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
Eric W. Biederman5f256be2007-09-12 11:50:50 +020084}
Eric W. Biederman5f256be2007-09-12 11:50:50 +020085
Johann Felix Soden45a19b02007-11-07 01:30:30 -080086static void net_free(struct net *net)
87{
88 if (!net)
89 return;
90
Denis V. Lunev5d1e4462008-04-16 01:58:04 -070091#ifdef NETNS_REFCNT_DEBUG
Johann Felix Soden45a19b02007-11-07 01:30:30 -080092 if (unlikely(atomic_read(&net->use_count) != 0)) {
93 printk(KERN_EMERG "network namespace not free! Usage: %d\n",
94 atomic_read(&net->use_count));
95 return;
96 }
Denis V. Lunev5d1e4462008-04-16 01:58:04 -070097#endif
Johann Felix Soden45a19b02007-11-07 01:30:30 -080098
99 kmem_cache_free(net_cachep, net);
100}
101
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700102struct net *copy_net_ns(unsigned long flags, struct net *old_net)
103{
104 struct net *new_net = NULL;
105 int err;
106
107 get_net(old_net);
108
109 if (!(flags & CLONE_NEWNET))
110 return old_net;
111
112 err = -ENOMEM;
113 new_net = net_alloc();
114 if (!new_net)
115 goto out;
116
117 mutex_lock(&net_mutex);
118 err = setup_net(new_net);
119 if (err)
120 goto out_unlock;
121
122 rtnl_lock();
123 list_add_tail(&new_net->list, &net_namespace_list);
124 rtnl_unlock();
125
126
127out_unlock:
128 mutex_unlock(&net_mutex);
129out:
130 put_net(old_net);
131 if (err) {
132 net_free(new_net);
133 new_net = ERR_PTR(err);
134 }
135 return new_net;
136}
137
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200138static void cleanup_net(struct work_struct *work)
139{
140 struct pernet_operations *ops;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200141 struct net *net;
142
143 net = container_of(work, struct net, work);
144
145 mutex_lock(&net_mutex);
146
147 /* Don't let anyone else find us. */
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700148 rtnl_lock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200149 list_del(&net->list);
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700150 rtnl_unlock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200151
152 /* Run all of the network namespace exit methods */
Pavel Emelyanov768f35912007-09-18 13:20:41 -0700153 list_for_each_entry_reverse(ops, &pernet_list, list) {
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200154 if (ops->exit)
155 ops->exit(net);
156 }
157
158 mutex_unlock(&net_mutex);
159
160 /* Ensure there are no outstanding rcu callbacks using this
161 * network namespace.
162 */
163 rcu_barrier();
164
165 /* Finally it is safe to free my network namespace structure */
166 net_free(net);
167}
168
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200169void __put_net(struct net *net)
170{
171 /* Cleanup the network namespace in process context */
172 INIT_WORK(&net->work, cleanup_net);
Benjamin Thery3ef13552007-11-19 23:18:16 -0800173 queue_work(netns_wq, &net->work);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200174}
175EXPORT_SYMBOL_GPL(__put_net);
176
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700177#else
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700178struct net *copy_net_ns(unsigned long flags, struct net *old_net)
179{
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700180 if (flags & CLONE_NEWNET)
181 return ERR_PTR(-EINVAL);
182 return old_net;
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700183}
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700184#endif
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700185
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200186static int __init net_ns_init(void)
187{
188 int err;
189
190 printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
Pavel Emelyanovd57a9212007-11-01 00:46:50 -0700191#ifdef CONFIG_NET_NS
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200192 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
193 SMP_CACHE_BYTES,
194 SLAB_PANIC, NULL);
Benjamin Thery3ef13552007-11-19 23:18:16 -0800195
196 /* Create workqueue for cleanup */
197 netns_wq = create_singlethread_workqueue("netns");
198 if (!netns_wq)
199 panic("Could not create netns workq");
Pavel Emelyanovd57a9212007-11-01 00:46:50 -0700200#endif
Benjamin Thery3ef13552007-11-19 23:18:16 -0800201
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200202 mutex_lock(&net_mutex);
203 err = setup_net(&init_net);
204
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700205 rtnl_lock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200206 list_add_tail(&init_net.list, &net_namespace_list);
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700207 rtnl_unlock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200208
209 mutex_unlock(&net_mutex);
210 if (err)
211 panic("Could not setup the initial network namespace");
212
213 return 0;
214}
215
216pure_initcall(net_ns_init);
217
Denis V. Luneved160e82007-11-13 03:23:21 -0800218#ifdef CONFIG_NET_NS
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200219static int register_pernet_operations(struct list_head *list,
220 struct pernet_operations *ops)
221{
222 struct net *net, *undo_net;
223 int error;
224
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200225 list_add_tail(&ops->list, list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700226 if (ops->init) {
227 for_each_net(net) {
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200228 error = ops->init(net);
229 if (error)
230 goto out_undo;
231 }
232 }
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700233 return 0;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200234
235out_undo:
236 /* If I have an error cleanup all namespaces I initialized */
237 list_del(&ops->list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700238 if (ops->exit) {
239 for_each_net(undo_net) {
240 if (undo_net == net)
241 goto undone;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200242 ops->exit(undo_net);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700243 }
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200244 }
245undone:
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700246 return error;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200247}
248
249static void unregister_pernet_operations(struct pernet_operations *ops)
250{
251 struct net *net;
252
253 list_del(&ops->list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700254 if (ops->exit)
255 for_each_net(net)
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200256 ops->exit(net);
257}
258
Denis V. Luneved160e82007-11-13 03:23:21 -0800259#else
260
261static int register_pernet_operations(struct list_head *list,
262 struct pernet_operations *ops)
263{
264 if (ops->init == NULL)
265 return 0;
266 return ops->init(&init_net);
267}
268
269static void unregister_pernet_operations(struct pernet_operations *ops)
270{
271 if (ops->exit)
272 ops->exit(&init_net);
273}
274#endif
275
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700276static DEFINE_IDA(net_generic_ids);
277
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200278/**
279 * register_pernet_subsys - register a network namespace subsystem
280 * @ops: pernet operations structure for the subsystem
281 *
282 * Register a subsystem which has init and exit functions
283 * that are called when network namespaces are created and
284 * destroyed respectively.
285 *
286 * When registered all network namespace init functions are
287 * called for every existing network namespace. Allowing kernel
288 * modules to have a race free view of the set of network namespaces.
289 *
290 * When a new network namespace is created all of the init
291 * methods are called in the order in which they were registered.
292 *
293 * When a network namespace is destroyed all of the exit methods
294 * are called in the reverse of the order with which they were
295 * registered.
296 */
297int register_pernet_subsys(struct pernet_operations *ops)
298{
299 int error;
300 mutex_lock(&net_mutex);
301 error = register_pernet_operations(first_device, ops);
302 mutex_unlock(&net_mutex);
303 return error;
304}
305EXPORT_SYMBOL_GPL(register_pernet_subsys);
306
307/**
308 * unregister_pernet_subsys - unregister a network namespace subsystem
309 * @ops: pernet operations structure to manipulate
310 *
311 * Remove the pernet operations structure from the list to be
Oliver Pinter53379e52008-02-03 17:56:48 +0200312 * used when network namespaces are created or destroyed. In
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200313 * addition run the exit method for all existing network
314 * namespaces.
315 */
316void unregister_pernet_subsys(struct pernet_operations *module)
317{
318 mutex_lock(&net_mutex);
319 unregister_pernet_operations(module);
320 mutex_unlock(&net_mutex);
321}
322EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
323
324/**
325 * register_pernet_device - register a network namespace device
326 * @ops: pernet operations structure for the subsystem
327 *
328 * Register a device which has init and exit functions
329 * that are called when network namespaces are created and
330 * destroyed respectively.
331 *
332 * When registered all network namespace init functions are
333 * called for every existing network namespace. Allowing kernel
334 * modules to have a race free view of the set of network namespaces.
335 *
336 * When a new network namespace is created all of the init
337 * methods are called in the order in which they were registered.
338 *
339 * When a network namespace is destroyed all of the exit methods
340 * are called in the reverse of the order with which they were
341 * registered.
342 */
343int register_pernet_device(struct pernet_operations *ops)
344{
345 int error;
346 mutex_lock(&net_mutex);
347 error = register_pernet_operations(&pernet_list, ops);
348 if (!error && (first_device == &pernet_list))
349 first_device = &ops->list;
350 mutex_unlock(&net_mutex);
351 return error;
352}
353EXPORT_SYMBOL_GPL(register_pernet_device);
354
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700355int register_pernet_gen_device(int *id, struct pernet_operations *ops)
356{
357 int error;
358 mutex_lock(&net_mutex);
359again:
360 error = ida_get_new_above(&net_generic_ids, 1, id);
361 if (error) {
362 if (error == -EAGAIN) {
363 ida_pre_get(&net_generic_ids, GFP_KERNEL);
364 goto again;
365 }
366 goto out;
367 }
368 error = register_pernet_operations(&pernet_list, ops);
369 if (error)
370 ida_remove(&net_generic_ids, *id);
371 else if (first_device == &pernet_list)
372 first_device = &ops->list;
373out:
374 mutex_unlock(&net_mutex);
375 return error;
376}
377EXPORT_SYMBOL_GPL(register_pernet_gen_device);
378
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200379/**
380 * unregister_pernet_device - unregister a network namespace netdevice
381 * @ops: pernet operations structure to manipulate
382 *
383 * Remove the pernet operations structure from the list to be
Oliver Pinter53379e52008-02-03 17:56:48 +0200384 * used when network namespaces are created or destroyed. In
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200385 * addition run the exit method for all existing network
386 * namespaces.
387 */
388void unregister_pernet_device(struct pernet_operations *ops)
389{
390 mutex_lock(&net_mutex);
391 if (&ops->list == first_device)
392 first_device = first_device->next;
393 unregister_pernet_operations(ops);
394 mutex_unlock(&net_mutex);
395}
396EXPORT_SYMBOL_GPL(unregister_pernet_device);
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700397
398void unregister_pernet_gen_device(int id, struct pernet_operations *ops)
399{
400 mutex_lock(&net_mutex);
401 if (&ops->list == first_device)
402 first_device = first_device->next;
403 unregister_pernet_operations(ops);
404 ida_remove(&net_generic_ids, id);
405 mutex_unlock(&net_mutex);
406}
407EXPORT_SYMBOL_GPL(unregister_pernet_gen_device);
Pavel Emelyanovdec827d2008-04-15 00:36:08 -0700408
409static void net_generic_release(struct rcu_head *rcu)
410{
411 struct net_generic *ng;
412
413 ng = container_of(rcu, struct net_generic, rcu);
414 kfree(ng);
415}
416
417int net_assign_generic(struct net *net, int id, void *data)
418{
419 struct net_generic *ng, *old_ng;
420
421 BUG_ON(!mutex_is_locked(&net_mutex));
422 BUG_ON(id == 0);
423
424 ng = old_ng = net->gen;
425 if (old_ng->len >= id)
426 goto assign;
427
428 ng = kzalloc(sizeof(struct net_generic) +
429 id * sizeof(void *), GFP_KERNEL);
430 if (ng == NULL)
431 return -ENOMEM;
432
433 /*
434 * Some synchronisation notes:
435 *
436 * The net_generic explores the net->gen array inside rcu
437 * read section. Besides once set the net->gen->ptr[x]
438 * pointer never changes (see rules in netns/generic.h).
439 *
440 * That said, we simply duplicate this array and schedule
441 * the old copy for kfree after a grace period.
442 */
443
444 ng->len = id;
445 INIT_RCU_HEAD(&ng->rcu);
446 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len);
447
448 rcu_assign_pointer(net->gen, ng);
449 call_rcu(&old_ng->rcu, net_generic_release);
450assign:
451 ng->ptr[id - 1] = data;
452 return 0;
453}
454EXPORT_SYMBOL_GPL(net_assign_generic);