blob: 9d1a4cac83b604cf657f1492628dbc4966db8eca [file] [log] [blame]
Joe Perchese005d192012-05-16 19:58:40 +00001#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
2
Eric W. Biederman5f256be2007-09-12 11:50:50 +02003#include <linux/workqueue.h>
4#include <linux/rtnetlink.h>
5#include <linux/cache.h>
6#include <linux/slab.h>
7#include <linux/list.h>
8#include <linux/delay.h>
Eric W. Biederman9dd776b2007-09-26 22:04:26 -07009#include <linux/sched.h>
Pavel Emelyanovc93cf612008-04-15 00:35:23 -070010#include <linux/idr.h>
Johannes Berg11a28d32009-07-10 09:51:33 +000011#include <linux/rculist.h>
Johannes Berg30ffee82009-07-10 09:51:35 +000012#include <linux/nsproxy.h>
David Howells0bb80f22013-04-12 01:50:06 +010013#include <linux/fs.h>
14#include <linux/proc_ns.h>
Eric W. Biedermanf0630522011-05-04 17:51:50 -070015#include <linux/file.h>
Paul Gortmakerbc3b2d72011-07-15 11:47:34 -040016#include <linux/export.h>
Eric W. Biederman038e7332012-06-14 02:31:10 -070017#include <linux/user_namespace.h>
Nicolas Dichtel0c7aecd2015-01-15 15:11:15 +010018#include <linux/net_namespace.h>
19#include <linux/rtnetlink.h>
20#include <net/sock.h>
21#include <net/netlink.h>
Eric W. Biederman5f256be2007-09-12 11:50:50 +020022#include <net/net_namespace.h>
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070023#include <net/netns/generic.h>
Eric W. Biederman5f256be2007-09-12 11:50:50 +020024
25/*
26 * Our network namespace constructor/destructor lists
27 */
28
29static LIST_HEAD(pernet_list);
30static struct list_head *first_device = &pernet_list;
Cong Wang200b9162014-05-12 15:11:20 -070031DEFINE_MUTEX(net_mutex);
Eric W. Biederman5f256be2007-09-12 11:50:50 +020032
Eric W. Biederman5f256be2007-09-12 11:50:50 +020033LIST_HEAD(net_namespace_list);
Alexey Dobriyanb76a4612008-10-08 11:35:06 +020034EXPORT_SYMBOL_GPL(net_namespace_list);
Eric W. Biederman5f256be2007-09-12 11:50:50 +020035
Rustad, Mark D734b6542012-07-18 09:06:07 +000036struct net init_net = {
37 .dev_base_head = LIST_HEAD_INIT(init_net.dev_base_head),
38};
Denis V. Lunevff4b9502008-01-22 22:05:33 -080039EXPORT_SYMBOL(init_net);
Eric W. Biederman5f256be2007-09-12 11:50:50 +020040
Pavel Emelyanovdec827d2008-04-15 00:36:08 -070041#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
42
Eric Dumazet073862b2012-01-26 00:41:38 +000043static unsigned int max_gen_ptrs = INITIAL_NET_GEN_PTRS;
44
45static struct net_generic *net_alloc_generic(void)
46{
47 struct net_generic *ng;
48 size_t generic_size = offsetof(struct net_generic, ptr[max_gen_ptrs]);
49
50 ng = kzalloc(generic_size, GFP_KERNEL);
51 if (ng)
52 ng->len = max_gen_ptrs;
53
54 return ng;
55}
56
Jiri Pirko05fceb42010-04-23 01:40:47 +000057static int net_assign_generic(struct net *net, int id, void *data)
58{
59 struct net_generic *ng, *old_ng;
60
61 BUG_ON(!mutex_is_locked(&net_mutex));
62 BUG_ON(id == 0);
63
Eric Dumazet1c877332010-10-25 03:20:11 +000064 old_ng = rcu_dereference_protected(net->gen,
65 lockdep_is_held(&net_mutex));
66 ng = old_ng;
Jiri Pirko05fceb42010-04-23 01:40:47 +000067 if (old_ng->len >= id)
68 goto assign;
69
Eric Dumazet073862b2012-01-26 00:41:38 +000070 ng = net_alloc_generic();
Jiri Pirko05fceb42010-04-23 01:40:47 +000071 if (ng == NULL)
72 return -ENOMEM;
73
74 /*
75 * Some synchronisation notes:
76 *
77 * The net_generic explores the net->gen array inside rcu
78 * read section. Besides once set the net->gen->ptr[x]
79 * pointer never changes (see rules in netns/generic.h).
80 *
81 * That said, we simply duplicate this array and schedule
82 * the old copy for kfree after a grace period.
83 */
84
Jiri Pirko05fceb42010-04-23 01:40:47 +000085 memcpy(&ng->ptr, &old_ng->ptr, old_ng->len * sizeof(void*));
86
87 rcu_assign_pointer(net->gen, ng);
Lai Jiangshan04d4dfe2011-03-18 12:06:32 +080088 kfree_rcu(old_ng, rcu);
Jiri Pirko05fceb42010-04-23 01:40:47 +000089assign:
90 ng->ptr[id - 1] = data;
91 return 0;
92}
93
Eric W. Biedermanf875bae2009-11-29 22:25:28 +000094static int ops_init(const struct pernet_operations *ops, struct net *net)
95{
Julian Anastasovb9229342012-04-16 04:43:15 +000096 int err = -ENOMEM;
97 void *data = NULL;
98
Eric W. Biedermanf875bae2009-11-29 22:25:28 +000099 if (ops->id && ops->size) {
Julian Anastasovb9229342012-04-16 04:43:15 +0000100 data = kzalloc(ops->size, GFP_KERNEL);
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000101 if (!data)
Julian Anastasovb9229342012-04-16 04:43:15 +0000102 goto out;
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000103
104 err = net_assign_generic(net, *ops->id, data);
Julian Anastasovb9229342012-04-16 04:43:15 +0000105 if (err)
106 goto cleanup;
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000107 }
Julian Anastasovb9229342012-04-16 04:43:15 +0000108 err = 0;
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000109 if (ops->init)
Julian Anastasovb9229342012-04-16 04:43:15 +0000110 err = ops->init(net);
111 if (!err)
112 return 0;
113
114cleanup:
115 kfree(data);
116
117out:
118 return err;
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000119}
120
121static void ops_free(const struct pernet_operations *ops, struct net *net)
122{
123 if (ops->id && ops->size) {
124 int id = *ops->id;
125 kfree(net_generic(net, id));
126 }
127}
128
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000129static void ops_exit_list(const struct pernet_operations *ops,
130 struct list_head *net_exit_list)
131{
132 struct net *net;
133 if (ops->exit) {
134 list_for_each_entry(net, net_exit_list, exit_list)
135 ops->exit(net);
136 }
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000137 if (ops->exit_batch)
138 ops->exit_batch(net_exit_list);
139}
140
141static void ops_free_list(const struct pernet_operations *ops,
142 struct list_head *net_exit_list)
143{
144 struct net *net;
145 if (ops->size && ops->id) {
146 list_for_each_entry(net, net_exit_list, exit_list)
147 ops_free(ops, net);
148 }
149}
150
Nicolas Dichtel0c7aecd2015-01-15 15:11:15 +0100151static int alloc_netid(struct net *net, struct net *peer, int reqid)
152{
153 int min = 0, max = 0;
154
155 ASSERT_RTNL();
156
157 if (reqid >= 0) {
158 min = reqid;
159 max = reqid + 1;
160 }
161
162 return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
163}
164
165/* This function is used by idr_for_each(). If net is equal to peer, the
166 * function returns the id so that idr_for_each() stops. Because we cannot
167 * returns the id 0 (idr_for_each() will not stop), we return the magic value
168 * NET_ID_ZERO (-1) for it.
169 */
170#define NET_ID_ZERO -1
171static int net_eq_idr(int id, void *net, void *peer)
172{
173 if (net_eq(net, peer))
174 return id ? : NET_ID_ZERO;
175 return 0;
176}
177
178static int __peernet2id(struct net *net, struct net *peer, bool alloc)
179{
180 int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
181
182 ASSERT_RTNL();
183
184 /* Magic value for id 0. */
185 if (id == NET_ID_ZERO)
186 return 0;
187 if (id > 0)
188 return id;
189
190 if (alloc)
191 return alloc_netid(net, peer, -1);
192
193 return -ENOENT;
194}
195
196/* This function returns the id of a peer netns. If no id is assigned, one will
197 * be allocated and returned.
198 */
199int peernet2id(struct net *net, struct net *peer)
200{
201 int id = __peernet2id(net, peer, true);
202
203 return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
204}
205
206struct net *get_net_ns_by_id(struct net *net, int id)
207{
208 struct net *peer;
209
210 if (id < 0)
211 return NULL;
212
213 rcu_read_lock();
214 peer = idr_find(&net->netns_ids, id);
215 if (peer)
216 get_net(peer);
217 rcu_read_unlock();
218
219 return peer;
220}
221
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700222/*
223 * setup_net runs the initializers for the network namespace object.
224 */
Eric W. Biederman038e7332012-06-14 02:31:10 -0700225static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700226{
227 /* Must be called with net_mutex held */
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000228 const struct pernet_operations *ops, *saved_ops;
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800229 int error = 0;
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000230 LIST_HEAD(net_exit_list);
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700231
232 atomic_set(&net->count, 1);
Al Viroa685e082011-06-08 21:13:01 -0400233 atomic_set(&net->passive, 1);
Thomas Graf4e985ad2011-06-21 03:11:20 +0000234 net->dev_base_seq = 1;
Eric W. Biederman038e7332012-06-14 02:31:10 -0700235 net->user_ns = user_ns;
Nicolas Dichtel0c7aecd2015-01-15 15:11:15 +0100236 idr_init(&net->netns_ids);
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800237
Denis V. Lunev5d1e4462008-04-16 01:58:04 -0700238#ifdef NETNS_REFCNT_DEBUG
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700239 atomic_set(&net->use_count, 0);
Denis V. Lunev5d1e4462008-04-16 01:58:04 -0700240#endif
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700241
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700242 list_for_each_entry(ops, &pernet_list, list) {
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000243 error = ops_init(ops, net);
244 if (error < 0)
245 goto out_undo;
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700246 }
247out:
248 return error;
249
250out_undo:
251 /* Walk through the list backwards calling the exit functions
252 * for the pernet modules whose init functions did not fail.
253 */
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000254 list_add(&net->exit_list, &net_exit_list);
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000255 saved_ops = ops;
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000256 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
257 ops_exit_list(ops, &net_exit_list);
258
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000259 ops = saved_ops;
260 list_for_each_entry_continue_reverse(ops, &pernet_list, list)
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000261 ops_free_list(ops, &net_exit_list);
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700262
263 rcu_barrier();
264 goto out;
265}
266
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800267
Clemens Nossebe47d42009-02-23 15:37:35 -0800268#ifdef CONFIG_NET_NS
269static struct kmem_cache *net_cachep;
270static struct workqueue_struct *netns_wq;
271
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200272static struct net *net_alloc(void)
273{
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800274 struct net *net = NULL;
275 struct net_generic *ng;
276
277 ng = net_alloc_generic();
278 if (!ng)
279 goto out;
280
281 net = kmem_cache_zalloc(net_cachep, GFP_KERNEL);
282 if (!net)
283 goto out_free;
284
285 rcu_assign_pointer(net->gen, ng);
286out:
287 return net;
288
289out_free:
290 kfree(ng);
291 goto out;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200292}
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200293
Johann Felix Soden45a19b02007-11-07 01:30:30 -0800294static void net_free(struct net *net)
295{
Denis V. Lunev5d1e4462008-04-16 01:58:04 -0700296#ifdef NETNS_REFCNT_DEBUG
Johann Felix Soden45a19b02007-11-07 01:30:30 -0800297 if (unlikely(atomic_read(&net->use_count) != 0)) {
Joe Perchese005d192012-05-16 19:58:40 +0000298 pr_emerg("network namespace not free! Usage: %d\n",
299 atomic_read(&net->use_count));
Johann Felix Soden45a19b02007-11-07 01:30:30 -0800300 return;
301 }
Denis V. Lunev5d1e4462008-04-16 01:58:04 -0700302#endif
Eric Dumazet416c51e2014-09-09 08:24:53 -0700303 kfree(rcu_access_pointer(net->gen));
Johann Felix Soden45a19b02007-11-07 01:30:30 -0800304 kmem_cache_free(net_cachep, net);
305}
306
Al Viroa685e082011-06-08 21:13:01 -0400307void net_drop_ns(void *p)
308{
309 struct net *ns = p;
310 if (ns && atomic_dec_and_test(&ns->passive))
311 net_free(ns);
312}
313
Eric W. Biederman038e7332012-06-14 02:31:10 -0700314struct net *copy_net_ns(unsigned long flags,
315 struct user_namespace *user_ns, struct net *old_net)
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700316{
Alexey Dobriyan088eb2d2009-05-04 11:12:14 -0700317 struct net *net;
318 int rv;
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700319
Rob Landley911cb192011-04-15 02:26:25 +0000320 if (!(flags & CLONE_NEWNET))
321 return get_net(old_net);
322
Alexey Dobriyan088eb2d2009-05-04 11:12:14 -0700323 net = net_alloc();
324 if (!net)
325 return ERR_PTR(-ENOMEM);
Eric W. Biederman038e7332012-06-14 02:31:10 -0700326
327 get_user_ns(user_ns);
328
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700329 mutex_lock(&net_mutex);
Eric W. Biederman038e7332012-06-14 02:31:10 -0700330 rv = setup_net(net, user_ns);
Alexey Dobriyan088eb2d2009-05-04 11:12:14 -0700331 if (rv == 0) {
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800332 rtnl_lock();
Johannes Berg11a28d32009-07-10 09:51:33 +0000333 list_add_tail_rcu(&net->list, &net_namespace_list);
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800334 rtnl_unlock();
335 }
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700336 mutex_unlock(&net_mutex);
Alexey Dobriyan088eb2d2009-05-04 11:12:14 -0700337 if (rv < 0) {
Eric W. Biederman038e7332012-06-14 02:31:10 -0700338 put_user_ns(user_ns);
Al Viroa685e082011-06-08 21:13:01 -0400339 net_drop_ns(net);
Alexey Dobriyan088eb2d2009-05-04 11:12:14 -0700340 return ERR_PTR(rv);
341 }
342 return net;
343}
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800344
Eric W. Biederman2b035b32009-11-29 22:25:27 +0000345static DEFINE_SPINLOCK(cleanup_list_lock);
346static LIST_HEAD(cleanup_list); /* Must hold cleanup_list_lock to touch */
347
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200348static void cleanup_net(struct work_struct *work)
349{
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000350 const struct pernet_operations *ops;
Eric W. Biederman2b035b32009-11-29 22:25:27 +0000351 struct net *net, *tmp;
xiao jin1818ce42014-04-25 08:50:54 +0800352 struct list_head net_kill_list;
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000353 LIST_HEAD(net_exit_list);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200354
Eric W. Biederman2b035b32009-11-29 22:25:27 +0000355 /* Atomically snapshot the list of namespaces to cleanup */
356 spin_lock_irq(&cleanup_list_lock);
357 list_replace_init(&cleanup_list, &net_kill_list);
358 spin_unlock_irq(&cleanup_list_lock);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200359
360 mutex_lock(&net_mutex);
361
362 /* Don't let anyone else find us. */
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700363 rtnl_lock();
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000364 list_for_each_entry(net, &net_kill_list, cleanup_list) {
Eric W. Biederman2b035b32009-11-29 22:25:27 +0000365 list_del_rcu(&net->list);
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000366 list_add_tail(&net->exit_list, &net_exit_list);
Nicolas Dichtel0c7aecd2015-01-15 15:11:15 +0100367 for_each_net(tmp) {
368 int id = __peernet2id(tmp, net, false);
369
370 if (id >= 0)
371 idr_remove(&tmp->netns_ids, id);
372 }
373 idr_destroy(&net->netns_ids);
374
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000375 }
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700376 rtnl_unlock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200377
Johannes Berg11a28d32009-07-10 09:51:33 +0000378 /*
379 * Another CPU might be rcu-iterating the list, wait for it.
380 * This needs to be before calling the exit() notifiers, so
381 * the rcu_barrier() below isn't sufficient alone.
382 */
383 synchronize_rcu();
384
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200385 /* Run all of the network namespace exit methods */
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000386 list_for_each_entry_reverse(ops, &pernet_list, list)
387 ops_exit_list(ops, &net_exit_list);
388
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000389 /* Free the net generic variables */
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000390 list_for_each_entry_reverse(ops, &pernet_list, list)
391 ops_free_list(ops, &net_exit_list);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200392
393 mutex_unlock(&net_mutex);
394
395 /* Ensure there are no outstanding rcu callbacks using this
396 * network namespace.
397 */
398 rcu_barrier();
399
400 /* Finally it is safe to free my network namespace structure */
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000401 list_for_each_entry_safe(net, tmp, &net_exit_list, exit_list) {
402 list_del_init(&net->exit_list);
Eric W. Biederman038e7332012-06-14 02:31:10 -0700403 put_user_ns(net->user_ns);
Al Viroa685e082011-06-08 21:13:01 -0400404 net_drop_ns(net);
Eric W. Biederman2b035b32009-11-29 22:25:27 +0000405 }
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200406}
Eric W. Biederman2b035b32009-11-29 22:25:27 +0000407static DECLARE_WORK(net_cleanup_work, cleanup_net);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200408
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200409void __put_net(struct net *net)
410{
411 /* Cleanup the network namespace in process context */
Eric W. Biederman2b035b32009-11-29 22:25:27 +0000412 unsigned long flags;
413
414 spin_lock_irqsave(&cleanup_list_lock, flags);
415 list_add(&net->cleanup_list, &cleanup_list);
416 spin_unlock_irqrestore(&cleanup_list_lock, flags);
417
418 queue_work(netns_wq, &net_cleanup_work);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200419}
420EXPORT_SYMBOL_GPL(__put_net);
421
Stephen Rothwell956c9202011-05-12 13:51:13 +1000422struct net *get_net_ns_by_fd(int fd)
423{
Stephen Rothwell956c9202011-05-12 13:51:13 +1000424 struct file *file;
Al Viro33c42942014-11-01 02:32:53 -0400425 struct ns_common *ns;
Stephen Rothwell956c9202011-05-12 13:51:13 +1000426 struct net *net;
427
Stephen Rothwell956c9202011-05-12 13:51:13 +1000428 file = proc_ns_fget(fd);
Al Viroc316e6a2011-06-05 00:37:35 +0000429 if (IS_ERR(file))
430 return ERR_CAST(file);
Stephen Rothwell956c9202011-05-12 13:51:13 +1000431
Al Virof77c8012014-11-01 03:13:17 -0400432 ns = get_proc_ns(file_inode(file));
Al Viro33c42942014-11-01 02:32:53 -0400433 if (ns->ops == &netns_operations)
434 net = get_net(container_of(ns, struct net, ns));
Al Viroc316e6a2011-06-05 00:37:35 +0000435 else
436 net = ERR_PTR(-EINVAL);
Stephen Rothwell956c9202011-05-12 13:51:13 +1000437
Al Viroc316e6a2011-06-05 00:37:35 +0000438 fput(file);
Stephen Rothwell956c9202011-05-12 13:51:13 +1000439 return net;
440}
441
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700442#else
Stephen Rothwell956c9202011-05-12 13:51:13 +1000443struct net *get_net_ns_by_fd(int fd)
444{
445 return ERR_PTR(-EINVAL);
446}
Pavel Emelyanov6a1a3b92007-11-01 00:44:50 -0700447#endif
Eric W. Biederman9dd776b2007-09-26 22:04:26 -0700448
Johannes Berg30ffee82009-07-10 09:51:35 +0000449struct net *get_net_ns_by_pid(pid_t pid)
450{
451 struct task_struct *tsk;
452 struct net *net;
453
454 /* Lookup the network namespace */
455 net = ERR_PTR(-ESRCH);
456 rcu_read_lock();
457 tsk = find_task_by_vpid(pid);
458 if (tsk) {
459 struct nsproxy *nsproxy;
Eric W. Biederman728dba32014-02-03 19:13:49 -0800460 task_lock(tsk);
461 nsproxy = tsk->nsproxy;
Johannes Berg30ffee82009-07-10 09:51:35 +0000462 if (nsproxy)
463 net = get_net(nsproxy->net_ns);
Eric W. Biederman728dba32014-02-03 19:13:49 -0800464 task_unlock(tsk);
Johannes Berg30ffee82009-07-10 09:51:35 +0000465 }
466 rcu_read_unlock();
467 return net;
468}
469EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
470
Eric W. Biederman98f842e2011-06-15 10:21:48 -0700471static __net_init int net_ns_net_init(struct net *net)
472{
Al Viro33c42942014-11-01 02:32:53 -0400473#ifdef CONFIG_NET_NS
474 net->ns.ops = &netns_operations;
475#endif
Al Viro6344c432014-11-01 00:45:45 -0400476 return ns_alloc_inum(&net->ns);
Eric W. Biederman98f842e2011-06-15 10:21:48 -0700477}
478
479static __net_exit void net_ns_net_exit(struct net *net)
480{
Al Viro6344c432014-11-01 00:45:45 -0400481 ns_free_inum(&net->ns);
Eric W. Biederman98f842e2011-06-15 10:21:48 -0700482}
483
484static struct pernet_operations __net_initdata net_ns_ops = {
485 .init = net_ns_net_init,
486 .exit = net_ns_net_exit,
487};
488
Nicolas Dichtel0c7aecd2015-01-15 15:11:15 +0100489static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
490 [NETNSA_NONE] = { .type = NLA_UNSPEC },
491 [NETNSA_NSID] = { .type = NLA_S32 },
492 [NETNSA_PID] = { .type = NLA_U32 },
493 [NETNSA_FD] = { .type = NLA_U32 },
494};
495
496static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
497{
498 struct net *net = sock_net(skb->sk);
499 struct nlattr *tb[NETNSA_MAX + 1];
500 struct net *peer;
501 int nsid, err;
502
503 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
504 rtnl_net_policy);
505 if (err < 0)
506 return err;
507 if (!tb[NETNSA_NSID])
508 return -EINVAL;
509 nsid = nla_get_s32(tb[NETNSA_NSID]);
510
511 if (tb[NETNSA_PID])
512 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
513 else if (tb[NETNSA_FD])
514 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
515 else
516 return -EINVAL;
517 if (IS_ERR(peer))
518 return PTR_ERR(peer);
519
520 if (__peernet2id(net, peer, false) >= 0) {
521 err = -EEXIST;
522 goto out;
523 }
524
525 err = alloc_netid(net, peer, nsid);
526 if (err > 0)
527 err = 0;
528out:
529 put_net(peer);
530 return err;
531}
532
533static int rtnl_net_get_size(void)
534{
535 return NLMSG_ALIGN(sizeof(struct rtgenmsg))
536 + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
537 ;
538}
539
540static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
541 int cmd, struct net *net, struct net *peer)
542{
543 struct nlmsghdr *nlh;
544 struct rtgenmsg *rth;
545 int id;
546
547 ASSERT_RTNL();
548
549 nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
550 if (!nlh)
551 return -EMSGSIZE;
552
553 rth = nlmsg_data(nlh);
554 rth->rtgen_family = AF_UNSPEC;
555
556 id = __peernet2id(net, peer, false);
557 if (id < 0)
558 id = NETNSA_NSID_NOT_ASSIGNED;
559 if (nla_put_s32(skb, NETNSA_NSID, id))
560 goto nla_put_failure;
561
562 nlmsg_end(skb, nlh);
563 return 0;
564
565nla_put_failure:
566 nlmsg_cancel(skb, nlh);
567 return -EMSGSIZE;
568}
569
570static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
571{
572 struct net *net = sock_net(skb->sk);
573 struct nlattr *tb[NETNSA_MAX + 1];
574 struct sk_buff *msg;
575 int err = -ENOBUFS;
576 struct net *peer;
577
578 err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
579 rtnl_net_policy);
580 if (err < 0)
581 return err;
582 if (tb[NETNSA_PID])
583 peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
584 else if (tb[NETNSA_FD])
585 peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
586 else
587 return -EINVAL;
588
589 if (IS_ERR(peer))
590 return PTR_ERR(peer);
591
592 msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
593 if (!msg) {
594 err = -ENOMEM;
595 goto out;
596 }
597
598 err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
599 RTM_GETNSID, net, peer);
600 if (err < 0)
601 goto err_out;
602
603 err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
604 goto out;
605
606err_out:
607 nlmsg_free(msg);
608out:
609 put_net(peer);
610 return err;
611}
612
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200613static int __init net_ns_init(void)
614{
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800615 struct net_generic *ng;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200616
Pavel Emelyanovd57a9212007-11-01 00:46:50 -0700617#ifdef CONFIG_NET_NS
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200618 net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
619 SMP_CACHE_BYTES,
620 SLAB_PANIC, NULL);
Benjamin Thery3ef13552007-11-19 23:18:16 -0800621
622 /* Create workqueue for cleanup */
623 netns_wq = create_singlethread_workqueue("netns");
624 if (!netns_wq)
625 panic("Could not create netns workq");
Pavel Emelyanovd57a9212007-11-01 00:46:50 -0700626#endif
Benjamin Thery3ef13552007-11-19 23:18:16 -0800627
Daniel Lezcano486a87f2009-02-22 00:07:53 -0800628 ng = net_alloc_generic();
629 if (!ng)
630 panic("Could not allocate generic netns");
631
632 rcu_assign_pointer(init_net.gen, ng);
633
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200634 mutex_lock(&net_mutex);
Eric W. Biederman038e7332012-06-14 02:31:10 -0700635 if (setup_net(&init_net, &init_user_ns))
Stephen Hemmingerca0f3112009-05-21 15:10:31 -0700636 panic("Could not setup the initial network namespace");
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200637
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700638 rtnl_lock();
Johannes Berg11a28d32009-07-10 09:51:33 +0000639 list_add_tail_rcu(&init_net.list, &net_namespace_list);
Eric W. Biedermanf4618d32007-09-26 22:40:08 -0700640 rtnl_unlock();
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200641
642 mutex_unlock(&net_mutex);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200643
Eric W. Biederman98f842e2011-06-15 10:21:48 -0700644 register_pernet_subsys(&net_ns_ops);
645
Nicolas Dichtel0c7aecd2015-01-15 15:11:15 +0100646 rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
647 rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL);
648
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200649 return 0;
650}
651
652pure_initcall(net_ns_init);
653
Denis V. Luneved160e82007-11-13 03:23:21 -0800654#ifdef CONFIG_NET_NS
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000655static int __register_pernet_operations(struct list_head *list,
656 struct pernet_operations *ops)
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200657{
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000658 struct net *net;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200659 int error;
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000660 LIST_HEAD(net_exit_list);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200661
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200662 list_add_tail(&ops->list, list);
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000663 if (ops->init || (ops->id && ops->size)) {
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700664 for_each_net(net) {
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000665 error = ops_init(ops, net);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200666 if (error)
667 goto out_undo;
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000668 list_add_tail(&net->exit_list, &net_exit_list);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200669 }
670 }
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700671 return 0;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200672
673out_undo:
674 /* If I have an error cleanup all namespaces I initialized */
675 list_del(&ops->list);
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000676 ops_exit_list(ops, &net_exit_list);
677 ops_free_list(ops, &net_exit_list);
Pavel Emelyanov1dba3232007-11-01 00:42:43 -0700678 return error;
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200679}
680
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000681static void __unregister_pernet_operations(struct pernet_operations *ops)
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200682{
683 struct net *net;
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000684 LIST_HEAD(net_exit_list);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200685
686 list_del(&ops->list);
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000687 for_each_net(net)
688 list_add_tail(&net->exit_list, &net_exit_list);
689 ops_exit_list(ops, &net_exit_list);
690 ops_free_list(ops, &net_exit_list);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200691}
692
Denis V. Luneved160e82007-11-13 03:23:21 -0800693#else
694
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000695static int __register_pernet_operations(struct list_head *list,
696 struct pernet_operations *ops)
697{
Julian Anastasovb9229342012-04-16 04:43:15 +0000698 return ops_init(ops, &init_net);
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000699}
700
701static void __unregister_pernet_operations(struct pernet_operations *ops)
702{
Eric W. Biederman72ad9372009-12-03 02:29:03 +0000703 LIST_HEAD(net_exit_list);
704 list_add(&init_net.exit_list, &net_exit_list);
705 ops_exit_list(ops, &net_exit_list);
706 ops_free_list(ops, &net_exit_list);
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000707}
708
709#endif /* CONFIG_NET_NS */
710
711static DEFINE_IDA(net_generic_ids);
712
Denis V. Luneved160e82007-11-13 03:23:21 -0800713static int register_pernet_operations(struct list_head *list,
714 struct pernet_operations *ops)
715{
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000716 int error;
717
718 if (ops->id) {
719again:
720 error = ida_get_new_above(&net_generic_ids, 1, ops->id);
721 if (error < 0) {
722 if (error == -EAGAIN) {
723 ida_pre_get(&net_generic_ids, GFP_KERNEL);
724 goto again;
725 }
726 return error;
727 }
Eric Dumazet073862b2012-01-26 00:41:38 +0000728 max_gen_ptrs = max_t(unsigned int, max_gen_ptrs, *ops->id);
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000729 }
730 error = __register_pernet_operations(list, ops);
Eric W. Biederman3a765ed2009-12-03 02:29:06 +0000731 if (error) {
732 rcu_barrier();
733 if (ops->id)
734 ida_remove(&net_generic_ids, *ops->id);
735 }
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000736
737 return error;
Denis V. Luneved160e82007-11-13 03:23:21 -0800738}
739
740static void unregister_pernet_operations(struct pernet_operations *ops)
741{
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000742
743 __unregister_pernet_operations(ops);
Eric W. Biederman3a765ed2009-12-03 02:29:06 +0000744 rcu_barrier();
Eric W. Biedermanf875bae2009-11-29 22:25:28 +0000745 if (ops->id)
746 ida_remove(&net_generic_ids, *ops->id);
Denis V. Luneved160e82007-11-13 03:23:21 -0800747}
Pavel Emelyanovc93cf612008-04-15 00:35:23 -0700748
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200749/**
750 * register_pernet_subsys - register a network namespace subsystem
751 * @ops: pernet operations structure for the subsystem
752 *
753 * Register a subsystem which has init and exit functions
754 * that are called when network namespaces are created and
755 * destroyed respectively.
756 *
757 * When registered all network namespace init functions are
758 * called for every existing network namespace. Allowing kernel
759 * modules to have a race free view of the set of network namespaces.
760 *
761 * When a new network namespace is created all of the init
762 * methods are called in the order in which they were registered.
763 *
764 * When a network namespace is destroyed all of the exit methods
765 * are called in the reverse of the order with which they were
766 * registered.
767 */
768int register_pernet_subsys(struct pernet_operations *ops)
769{
770 int error;
771 mutex_lock(&net_mutex);
772 error = register_pernet_operations(first_device, ops);
773 mutex_unlock(&net_mutex);
774 return error;
775}
776EXPORT_SYMBOL_GPL(register_pernet_subsys);
777
778/**
779 * unregister_pernet_subsys - unregister a network namespace subsystem
780 * @ops: pernet operations structure to manipulate
781 *
782 * Remove the pernet operations structure from the list to be
Oliver Pinter53379e52008-02-03 17:56:48 +0200783 * used when network namespaces are created or destroyed. In
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200784 * addition run the exit method for all existing network
785 * namespaces.
786 */
Jiri Pirkob3c981d2010-04-25 00:49:56 -0700787void unregister_pernet_subsys(struct pernet_operations *ops)
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200788{
789 mutex_lock(&net_mutex);
Jiri Pirkob3c981d2010-04-25 00:49:56 -0700790 unregister_pernet_operations(ops);
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200791 mutex_unlock(&net_mutex);
792}
793EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
794
795/**
796 * register_pernet_device - register a network namespace device
797 * @ops: pernet operations structure for the subsystem
798 *
799 * Register a device which has init and exit functions
800 * that are called when network namespaces are created and
801 * destroyed respectively.
802 *
803 * When registered all network namespace init functions are
804 * called for every existing network namespace. Allowing kernel
805 * modules to have a race free view of the set of network namespaces.
806 *
807 * When a new network namespace is created all of the init
808 * methods are called in the order in which they were registered.
809 *
810 * When a network namespace is destroyed all of the exit methods
811 * are called in the reverse of the order with which they were
812 * registered.
813 */
814int register_pernet_device(struct pernet_operations *ops)
815{
816 int error;
817 mutex_lock(&net_mutex);
818 error = register_pernet_operations(&pernet_list, ops);
819 if (!error && (first_device == &pernet_list))
820 first_device = &ops->list;
821 mutex_unlock(&net_mutex);
822 return error;
823}
824EXPORT_SYMBOL_GPL(register_pernet_device);
825
826/**
827 * unregister_pernet_device - unregister a network namespace netdevice
828 * @ops: pernet operations structure to manipulate
829 *
830 * Remove the pernet operations structure from the list to be
Oliver Pinter53379e52008-02-03 17:56:48 +0200831 * used when network namespaces are created or destroyed. In
Eric W. Biederman5f256be2007-09-12 11:50:50 +0200832 * addition run the exit method for all existing network
833 * namespaces.
834 */
835void unregister_pernet_device(struct pernet_operations *ops)
836{
837 mutex_lock(&net_mutex);
838 if (&ops->list == first_device)
839 first_device = first_device->next;
840 unregister_pernet_operations(ops);
841 mutex_unlock(&net_mutex);
842}
843EXPORT_SYMBOL_GPL(unregister_pernet_device);
Eric W. Biederman13b6f572010-03-07 18:14:23 -0800844
845#ifdef CONFIG_NET_NS
Al Viro64964522014-11-01 00:37:32 -0400846static struct ns_common *netns_get(struct task_struct *task)
Eric W. Biederman13b6f572010-03-07 18:14:23 -0800847{
Eric W. Biedermanf0630522011-05-04 17:51:50 -0700848 struct net *net = NULL;
849 struct nsproxy *nsproxy;
850
Eric W. Biederman728dba32014-02-03 19:13:49 -0800851 task_lock(task);
852 nsproxy = task->nsproxy;
Eric W. Biedermanf0630522011-05-04 17:51:50 -0700853 if (nsproxy)
854 net = get_net(nsproxy->net_ns);
Eric W. Biederman728dba32014-02-03 19:13:49 -0800855 task_unlock(task);
Eric W. Biedermanf0630522011-05-04 17:51:50 -0700856
Al Viroff248702014-11-01 00:10:50 -0400857 return net ? &net->ns : NULL;
858}
859
860static inline struct net *to_net_ns(struct ns_common *ns)
861{
862 return container_of(ns, struct net, ns);
Eric W. Biederman13b6f572010-03-07 18:14:23 -0800863}
864
Al Viro64964522014-11-01 00:37:32 -0400865static void netns_put(struct ns_common *ns)
Eric W. Biederman13b6f572010-03-07 18:14:23 -0800866{
Al Viroff248702014-11-01 00:10:50 -0400867 put_net(to_net_ns(ns));
Eric W. Biederman13b6f572010-03-07 18:14:23 -0800868}
869
Al Viro64964522014-11-01 00:37:32 -0400870static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
Eric W. Biederman13b6f572010-03-07 18:14:23 -0800871{
Al Viroff248702014-11-01 00:10:50 -0400872 struct net *net = to_net_ns(ns);
Eric W. Biederman142e1d12012-07-26 01:13:20 -0700873
Eric W. Biederman5e4a0842012-12-14 07:55:36 -0800874 if (!ns_capable(net->user_ns, CAP_SYS_ADMIN) ||
Eric W. Biedermanc7b96ac2013-03-20 12:49:49 -0700875 !ns_capable(current_user_ns(), CAP_SYS_ADMIN))
Eric W. Biederman142e1d12012-07-26 01:13:20 -0700876 return -EPERM;
877
Eric W. Biederman13b6f572010-03-07 18:14:23 -0800878 put_net(nsproxy->net_ns);
Eric W. Biederman142e1d12012-07-26 01:13:20 -0700879 nsproxy->net_ns = get_net(net);
Eric W. Biederman13b6f572010-03-07 18:14:23 -0800880 return 0;
881}
882
883const struct proc_ns_operations netns_operations = {
884 .name = "net",
885 .type = CLONE_NEWNET,
886 .get = netns_get,
887 .put = netns_put,
888 .install = netns_install,
889};
890#endif