blob: 7c52fe277b62441966f758b25fad9acef18a10d0 [file] [log] [blame]
#include <linux/workqueue.h>
#include <linux/rtnetlink.h>
#include <linux/cache.h>
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/delay.h>
#include <linux/sched.h>
#include <linux/idr.h>
#include <net/net_namespace.h>
#include <net/netns/generic.h>
/*
* Our network namespace constructor/destructor lists
*/
static LIST_HEAD(pernet_list);
static struct list_head *first_device = &pernet_list;
static DEFINE_MUTEX(net_mutex);
LIST_HEAD(net_namespace_list);
struct net init_net;
EXPORT_SYMBOL(init_net);
#define INITIAL_NET_GEN_PTRS 13 /* +1 for len +2 for rcu_head */
/*
* setup_net runs the initializers for the network namespace object.
*/
static __net_init int setup_net(struct net *net)
{
/* Must be called with net_mutex held */
struct pernet_operations *ops;
int error;
struct net_generic *ng;
atomic_set(&net->count, 1);
#ifdef NETNS_REFCNT_DEBUG
atomic_set(&net->use_count, 0);
#endif
error = -ENOMEM;
ng = kzalloc(sizeof(struct net_generic) +
INITIAL_NET_GEN_PTRS * sizeof(void *), GFP_KERNEL);
if (ng == NULL)
goto out;
ng->len = INITIAL_NET_GEN_PTRS;
INIT_RCU_HEAD(&ng->rcu);
rcu_assign_pointer(net->gen, ng);
error = 0;
list_for_each_entry(ops, &pernet_list, list) {
if (ops->init) {
error = ops->init(net);
if (error < 0)
goto out_undo;
}
}
out:
return error;
out_undo:
/* Walk through the list backwards calling the exit functions
* for the pernet modules whose init functions did not fail.
*/
list_for_each_entry_continue_reverse(ops, &pernet_list, list) {
if (ops->exit)
ops->exit(net);
}
rcu_barrier();
kfree(ng);
goto out;
}
#ifdef CONFIG_NET_NS
static struct kmem_cache *net_cachep;
static struct workqueue_struct *netns_wq;
static struct net *net_alloc(void)
{
return kmem_cache_zalloc(net_cachep, GFP_KERNEL);
}
static void net_free(struct net *net)
{
if (!net)
return;
#ifdef NETNS_REFCNT_DEBUG
if (unlikely(atomic_read(&net->use_count) != 0)) {
printk(KERN_EMERG "network namespace not free! Usage: %d\n",
atomic_read(&net->use_count));
return;
}
#endif
kmem_cache_free(net_cachep, net);
}
struct net *copy_net_ns(unsigned long flags, struct net *old_net)
{
struct net *new_net = NULL;
int err;
get_net(old_net);
if (!(flags & CLONE_NEWNET))
return old_net;
err = -ENOMEM;
new_net = net_alloc();
if (!new_net)
goto out;
mutex_lock(&net_mutex);
err = setup_net(new_net);
if (err)
goto out_unlock;
rtnl_lock();
list_add_tail(&new_net->list, &net_namespace_list);
rtnl_unlock();
out_unlock:
mutex_unlock(&net_mutex);
out:
put_net(old_net);
if (err) {
net_free(new_net);
new_net = ERR_PTR(err);
}
return new_net;
}
static void cleanup_net(struct work_struct *work)
{
struct pernet_operations *ops;
struct net *net;
/* Be very certain incoming network packets will not find us */
rcu_barrier();
net = container_of(work, struct net, work);
mutex_lock(&net_mutex);
/* Don't let anyone else find us. */
rtnl_lock();
list_del(&net->list);
rtnl_unlock();
/* Run all of the network namespace exit methods */
list_for_each_entry_reverse(ops, &pernet_list, list) {
if (ops->exit)
ops->exit(net);
}
mutex_unlock(&net_mutex);
/* Ensure there are no outstanding rcu callbacks using this
* network namespace.
*/
rcu_barrier();
/* Finally it is safe to free my network namespace structure */
net_free(net);
}
void __put_net(struct net *net)
{
/* Cleanup the network namespace in process context */
INIT_WORK(&net->work, cleanup_net);
queue_work(netns_wq, &net->work);
}
EXPORT_SYMBOL_GPL(__put_net);
#else
struct net *copy_net_ns(unsigned long flags, struct net *old_net)
{
if (flags & CLONE_NEWNET)
return ERR_PTR(-EINVAL);
return old_net;
}
#endif
static int __init net_ns_init(void)
{
int err;
printk(KERN_INFO "net_namespace: %zd bytes\n", sizeof(struct net));
#ifdef CONFIG_NET_NS
net_cachep = kmem_cache_create("net_namespace", sizeof(struct net),
SMP_CACHE_BYTES,
SLAB_PANIC, NULL);
/* Create workqueue for cleanup */
netns_wq = create_singlethread_workqueue("netns");
if (!netns_wq)
panic("Could not create netns workq");
#endif
mutex_lock(&net_mutex);
err = setup_net(&init_net);
rtnl_lock();
list_add_tail(&init_net.list, &net_namespace_list);
rtnl_unlock();
mutex_unlock(&net_mutex);
if (err)
panic("Could not setup the initial network namespace");
return 0;
}
pure_initcall(net_ns_init);
#ifdef CONFIG_NET_NS
static int register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
{
struct net *net, *undo_net;
int error;
list_add_tail(&ops->list, list);
if (ops->init) {
for_each_net(net) {
error = ops->init(net);
if (error)
goto out_undo;
}
}
return 0;
out_undo:
/* If I have an error cleanup all namespaces I initialized */
list_del(&ops->list);
if (ops->exit) {
for_each_net(undo_net) {
if (undo_net == net)
goto undone;
ops->exit(undo_net);
}
}
undone:
return error;
}
static void unregister_pernet_operations(struct pernet_operations *ops)
{
struct net *net;
list_del(&ops->list);
if (ops->exit)
for_each_net(net)
ops->exit(net);
}
#else
static int register_pernet_operations(struct list_head *list,
struct pernet_operations *ops)
{
if (ops->init == NULL)
return 0;
return ops->init(&init_net);
}
static void unregister_pernet_operations(struct pernet_operations *ops)
{
if (ops->exit)
ops->exit(&init_net);
}
#endif
static DEFINE_IDA(net_generic_ids);
/**
* register_pernet_subsys - register a network namespace subsystem
* @ops: pernet operations structure for the subsystem
*
* Register a subsystem which has init and exit functions
* that are called when network namespaces are created and
* destroyed respectively.
*
* When registered all network namespace init functions are
* called for every existing network namespace. Allowing kernel
* modules to have a race free view of the set of network namespaces.
*
* When a new network namespace is created all of the init
* methods are called in the order in which they were registered.
*
* When a network namespace is destroyed all of the exit methods
* are called in the reverse of the order with which they were
* registered.
*/
int register_pernet_subsys(struct pernet_operations *ops)
{
int error;
mutex_lock(&net_mutex);
error = register_pernet_operations(first_device, ops);
mutex_unlock(&net_mutex);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_subsys);
/**
* unregister_pernet_subsys - unregister a network namespace subsystem
* @ops: pernet operations structure to manipulate
*
* Remove the pernet operations structure from the list to be
* used when network namespaces are created or destroyed. In
* addition run the exit method for all existing network
* namespaces.
*/
void unregister_pernet_subsys(struct pernet_operations *module)
{
mutex_lock(&net_mutex);
unregister_pernet_operations(module);
mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_subsys);
/**
* register_pernet_device - register a network namespace device
* @ops: pernet operations structure for the subsystem
*
* Register a device which has init and exit functions
* that are called when network namespaces are created and
* destroyed respectively.
*
* When registered all network namespace init functions are
* called for every existing network namespace. Allowing kernel
* modules to have a race free view of the set of network namespaces.
*
* When a new network namespace is created all of the init
* methods are called in the order in which they were registered.
*
* When a network namespace is destroyed all of the exit methods
* are called in the reverse of the order with which they were
* registered.
*/
int register_pernet_device(struct pernet_operations *ops)
{
int error;
mutex_lock(&net_mutex);
error = register_pernet_operations(&pernet_list, ops);
if (!error && (first_device == &pernet_list))
first_device = &ops->list;
mutex_unlock(&net_mutex);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_device);
int register_pernet_gen_device(int *id, struct pernet_operations *ops)
{
int error;
mutex_lock(&net_mutex);
again:
error = ida_get_new_above(&net_generic_ids, 1, id);
if (error) {
if (error == -EAGAIN) {
ida_pre_get(&net_generic_ids, GFP_KERNEL);
goto again;
}
goto out;
}
error = register_pernet_operations(&pernet_list, ops);
if (error)
ida_remove(&net_generic_ids, *id);
else if (first_device == &pernet_list)
first_device = &ops->list;
out:
mutex_unlock(&net_mutex);
return error;
}
EXPORT_SYMBOL_GPL(register_pernet_gen_device);
/**
* unregister_pernet_device - unregister a network namespace netdevice
* @ops: pernet operations structure to manipulate
*
* Remove the pernet operations structure from the list to be
* used when network namespaces are created or destroyed. In
* addition run the exit method for all existing network
* namespaces.
*/
void unregister_pernet_device(struct pernet_operations *ops)
{
mutex_lock(&net_mutex);
if (&ops->list == first_device)
first_device = first_device->next;
unregister_pernet_operations(ops);
mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_device);
void unregister_pernet_gen_device(int id, struct pernet_operations *ops)
{
mutex_lock(&net_mutex);
if (&ops->list == first_device)
first_device = first_device->next;
unregister_pernet_operations(ops);
ida_remove(&net_generic_ids, id);
mutex_unlock(&net_mutex);
}
EXPORT_SYMBOL_GPL(unregister_pernet_gen_device);
static void net_generic_release(struct rcu_head *rcu)
{
struct net_generic *ng;
ng = container_of(rcu, struct net_generic, rcu);
kfree(ng);
}
int net_assign_generic(struct net *net, int id, void *data)
{
struct net_generic *ng, *old_ng;
BUG_ON(!mutex_is_locked(&net_mutex));
BUG_ON(id == 0);
ng = old_ng = net->gen;
if (old_ng->len >= id)
goto assign;
ng = kzalloc(sizeof(struct net_generic) +
id * sizeof(void *), GFP_KERNEL);
if (ng == NULL)
return -ENOMEM;
/*
* Some synchronisation notes:
*
* The net_generic explores the net->gen array inside rcu
* read section. Besides once set the net->gen->ptr[x]
* pointer never changes (see rules in netns/generic.h).
*
* That said, we simply duplicate this array and schedule
* the old copy for kfree after a grace period.
*/
ng->len = id;
INIT_RCU_HEAD(&ng->rcu);
memcpy(&ng->ptr, &old_ng->ptr, old_ng->len);
rcu_assign_pointer(net->gen, ng);
call_rcu(&old_ng->rcu, net_generic_release);
assign:
ng->ptr[id - 1] = data;
return 0;
}
EXPORT_SYMBOL_GPL(net_assign_generic);