Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6
diff --git a/drivers/net/e100.c b/drivers/net/e100.c
index 5c0b457..0f9ee13 100644
--- a/drivers/net/e100.c
+++ b/drivers/net/e100.c
@@ -2728,7 +2728,7 @@
#define E100_82552_SMARTSPEED 0x14 /* SmartSpeed Ctrl register */
#define E100_82552_REV_ANEG 0x0200 /* Reverse auto-negotiation */
#define E100_82552_ANEG_NOW 0x0400 /* Auto-negotiate now */
-static int e100_suspend(struct pci_dev *pdev, pm_message_t state)
+static void __e100_shutdown(struct pci_dev *pdev, bool *enable_wake)
{
struct net_device *netdev = pci_get_drvdata(pdev);
struct nic *nic = netdev_priv(netdev);
@@ -2749,19 +2749,32 @@
E100_82552_SMARTSPEED, smartspeed |
E100_82552_REV_ANEG | E100_82552_ANEG_NOW);
}
- if (pci_enable_wake(pdev, PCI_D3cold, true))
- pci_enable_wake(pdev, PCI_D3hot, true);
+ *enable_wake = true;
} else {
- pci_enable_wake(pdev, PCI_D3hot, false);
+ *enable_wake = false;
}
pci_disable_device(pdev);
- pci_set_power_state(pdev, PCI_D3hot);
+}
- return 0;
+static int __e100_power_off(struct pci_dev *pdev, bool wake)
+{
+ if (wake) {
+ return pci_prepare_to_sleep(pdev);
+ } else {
+ pci_wake_from_d3(pdev, false);
+ return pci_set_power_state(pdev, PCI_D3hot);
+ }
}
#ifdef CONFIG_PM
+static int e100_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+ bool wake;
+ __e100_shutdown(pdev, &wake);
+ return __e100_power_off(pdev, wake);
+}
+
static int e100_resume(struct pci_dev *pdev)
{
struct net_device *netdev = pci_get_drvdata(pdev);
@@ -2792,7 +2805,10 @@
static void e100_shutdown(struct pci_dev *pdev)
{
- e100_suspend(pdev, PMSG_SUSPEND);
+ bool wake;
+ __e100_shutdown(pdev, &wake);
+ if (system_state == SYSTEM_POWER_OFF)
+ __e100_power_off(pdev, wake);
}
/* ------------------ PCI Error Recovery infrastructure -------------- */
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
index 7b1a652..1b2e435 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -354,9 +354,6 @@
/* What hooks you will enter on */
unsigned int valid_hooks;
- /* Lock for the curtain */
- struct mutex lock;
-
/* Man behind the curtain... */
struct xt_table_info *private;
@@ -434,8 +431,74 @@
extern struct xt_table_info *xt_alloc_table_info(unsigned int size);
extern void xt_free_table_info(struct xt_table_info *info);
-extern void xt_table_entry_swap_rcu(struct xt_table_info *old,
- struct xt_table_info *new);
+
+/*
+ * Per-CPU spinlock associated with per-cpu table entries, and
+ * with a counter for the "reading" side that allows a recursive
+ * reader to avoid taking the lock and deadlocking.
+ *
+ * "reading" is used by ip/arp/ip6 tables rule processing which runs per-cpu.
+ * It needs to ensure that the rules are not being changed while the packet
+ * is being processed. In some cases, the read lock will be acquired
+ * twice on the same CPU; this is okay because of the count.
+ *
+ * "writing" is used when reading counters.
+ * During replace any readers that are using the old tables have to complete
+ * before freeing the old table. This is handled by the write locking
+ * necessary for reading the counters.
+ */
+struct xt_info_lock {
+ spinlock_t lock;
+ unsigned char readers;
+};
+DECLARE_PER_CPU(struct xt_info_lock, xt_info_locks);
+
+/*
+ * Note: we need to ensure that preemption is disabled before acquiring
+ * the per-cpu-variable, so we do it as a two step process rather than
+ * using "spin_lock_bh()".
+ *
+ * We _also_ need to disable bottom half processing before updating our
+ * nesting count, to make sure that the only kind of re-entrancy is this
+ * code being called by itself: since the count+lock is not an atomic
+ * operation, we can allow no races.
+ *
+ * _Only_ that special combination of being per-cpu and never getting
+ * re-entered asynchronously means that the count is safe.
+ */
+static inline void xt_info_rdlock_bh(void)
+{
+ struct xt_info_lock *lock;
+
+ local_bh_disable();
+ lock = &__get_cpu_var(xt_info_locks);
+ if (!lock->readers++)
+ spin_lock(&lock->lock);
+}
+
+static inline void xt_info_rdunlock_bh(void)
+{
+ struct xt_info_lock *lock = &__get_cpu_var(xt_info_locks);
+
+ if (!--lock->readers)
+ spin_unlock(&lock->lock);
+ local_bh_enable();
+}
+
+/*
+ * The "writer" side needs to get exclusive access to the lock,
+ * regardless of readers. This must be called with bottom half
+ * processing (and thus also preemption) disabled.
+ */
+static inline void xt_info_wrlock(unsigned int cpu)
+{
+ spin_lock(&per_cpu(xt_info_locks, cpu).lock);
+}
+
+static inline void xt_info_wrunlock(unsigned int cpu)
+{
+ spin_unlock(&per_cpu(xt_info_locks, cpu).lock);
+}
/*
* This helper is performance critical and must be inlined
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index f69f015..ed3aea1 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -101,6 +101,7 @@
/* HCI timeouts */
#define HCI_CONNECT_TIMEOUT (40000) /* 40 seconds */
#define HCI_DISCONN_TIMEOUT (2000) /* 2 seconds */
+#define HCI_PAIRING_TIMEOUT (60000) /* 60 seconds */
#define HCI_IDLE_TIMEOUT (6000) /* 6 seconds */
#define HCI_INIT_TIMEOUT (10000) /* 10 seconds */
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 01f9316..be5bd71 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -171,6 +171,7 @@
__u8 auth_type;
__u8 sec_level;
__u8 power_save;
+ __u16 disc_timeout;
unsigned long pend;
unsigned int sent;
@@ -180,7 +181,8 @@
struct timer_list disc_timer;
struct timer_list idle_timer;
- struct work_struct work;
+ struct work_struct work_add;
+ struct work_struct work_del;
struct device dev;
@@ -348,9 +350,9 @@
if (conn->type == ACL_LINK) {
del_timer(&conn->idle_timer);
if (conn->state == BT_CONNECTED) {
- timeo = msecs_to_jiffies(HCI_DISCONN_TIMEOUT);
+ timeo = msecs_to_jiffies(conn->disc_timeout);
if (!conn->out)
- timeo *= 5;
+ timeo *= 2;
} else
timeo = msecs_to_jiffies(10);
} else
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 1181db0..375f4b4 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -215,6 +215,7 @@
conn->state = BT_OPEN;
conn->power_save = 1;
+ conn->disc_timeout = HCI_DISCONN_TIMEOUT;
switch (type) {
case ACL_LINK:
@@ -424,12 +425,9 @@
if (sec_level == BT_SECURITY_SDP)
return 1;
- if (sec_level == BT_SECURITY_LOW) {
- if (conn->ssp_mode > 0 && conn->hdev->ssp_mode > 0)
- return hci_conn_auth(conn, sec_level, auth_type);
- else
- return 1;
- }
+ if (sec_level == BT_SECURITY_LOW &&
+ (!conn->ssp_mode || !conn->hdev->ssp_mode))
+ return 1;
if (conn->link_mode & HCI_LM_ENCRYPT)
return hci_conn_auth(conn, sec_level, auth_type);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 15f40ea..4e7cb88 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -883,6 +883,7 @@
if (conn->type == ACL_LINK) {
conn->state = BT_CONFIG;
hci_conn_hold(conn);
+ conn->disc_timeout = HCI_DISCONN_TIMEOUT;
} else
conn->state = BT_CONNECTED;
@@ -1063,9 +1064,14 @@
hci_proto_connect_cfm(conn, ev->status);
hci_conn_put(conn);
}
- } else
+ } else {
hci_auth_cfm(conn, ev->status);
+ hci_conn_hold(conn);
+ conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+ hci_conn_put(conn);
+ }
+
if (test_bit(HCI_CONN_ENCRYPT_PEND, &conn->pend)) {
if (!ev->status) {
struct hci_cp_set_conn_encrypt cp;
@@ -1479,7 +1485,21 @@
static inline void hci_pin_code_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
+ struct hci_ev_pin_code_req *ev = (void *) skb->data;
+ struct hci_conn *conn;
+
BT_DBG("%s", hdev->name);
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+ if (conn) {
+ hci_conn_hold(conn);
+ conn->disc_timeout = HCI_PAIRING_TIMEOUT;
+ hci_conn_put(conn);
+ }
+
+ hci_dev_unlock(hdev);
}
static inline void hci_link_key_request_evt(struct hci_dev *hdev, struct sk_buff *skb)
@@ -1489,7 +1509,21 @@
static inline void hci_link_key_notify_evt(struct hci_dev *hdev, struct sk_buff *skb)
{
+ struct hci_ev_link_key_notify *ev = (void *) skb->data;
+ struct hci_conn *conn;
+
BT_DBG("%s", hdev->name);
+
+ hci_dev_lock(hdev);
+
+ conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &ev->bdaddr);
+ if (conn) {
+ hci_conn_hold(conn);
+ conn->disc_timeout = HCI_DISCONN_TIMEOUT;
+ hci_conn_put(conn);
+ }
+
+ hci_dev_unlock(hdev);
}
static inline void hci_clock_offset_evt(struct hci_dev *hdev, struct sk_buff *skb)
diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c
index ed82796..b7c5108 100644
--- a/net/bluetooth/hci_sysfs.c
+++ b/net/bluetooth/hci_sysfs.c
@@ -9,8 +9,7 @@
struct class *bt_class = NULL;
EXPORT_SYMBOL_GPL(bt_class);
-static struct workqueue_struct *btaddconn;
-static struct workqueue_struct *btdelconn;
+static struct workqueue_struct *bluetooth;
static inline char *link_typetostr(int type)
{
@@ -88,9 +87,10 @@
static void add_conn(struct work_struct *work)
{
- struct hci_conn *conn = container_of(work, struct hci_conn, work);
+ struct hci_conn *conn = container_of(work, struct hci_conn, work_add);
- flush_workqueue(btdelconn);
+ /* ensure previous add/del is complete */
+ flush_workqueue(bluetooth);
if (device_add(&conn->dev) < 0) {
BT_ERR("Failed to register connection device");
@@ -114,9 +114,9 @@
device_initialize(&conn->dev);
- INIT_WORK(&conn->work, add_conn);
+ INIT_WORK(&conn->work_add, add_conn);
- queue_work(btaddconn, &conn->work);
+ queue_work(bluetooth, &conn->work_add);
}
/*
@@ -131,9 +131,12 @@
static void del_conn(struct work_struct *work)
{
- struct hci_conn *conn = container_of(work, struct hci_conn, work);
+ struct hci_conn *conn = container_of(work, struct hci_conn, work_del);
struct hci_dev *hdev = conn->hdev;
+ /* ensure previous add/del is complete */
+ flush_workqueue(bluetooth);
+
while (1) {
struct device *dev;
@@ -156,9 +159,9 @@
if (!device_is_registered(&conn->dev))
return;
- INIT_WORK(&conn->work, del_conn);
+ INIT_WORK(&conn->work_del, del_conn);
- queue_work(btdelconn, &conn->work);
+ queue_work(bluetooth, &conn->work_del);
}
static inline char *host_typetostr(int type)
@@ -435,20 +438,13 @@
int __init bt_sysfs_init(void)
{
- btaddconn = create_singlethread_workqueue("btaddconn");
- if (!btaddconn)
+ bluetooth = create_singlethread_workqueue("bluetooth");
+ if (!bluetooth)
return -ENOMEM;
- btdelconn = create_singlethread_workqueue("btdelconn");
- if (!btdelconn) {
- destroy_workqueue(btaddconn);
- return -ENOMEM;
- }
-
bt_class = class_create(THIS_MODULE, "bluetooth");
if (IS_ERR(bt_class)) {
- destroy_workqueue(btdelconn);
- destroy_workqueue(btaddconn);
+ destroy_workqueue(bluetooth);
return PTR_ERR(bt_class);
}
@@ -457,8 +453,7 @@
void bt_sysfs_cleanup(void)
{
- destroy_workqueue(btaddconn);
- destroy_workqueue(btdelconn);
+ destroy_workqueue(bluetooth);
class_destroy(bt_class);
}
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 5ba533d..831fe18 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -253,9 +253,9 @@
indev = in ? in->name : nulldevname;
outdev = out ? out->name : nulldevname;
- rcu_read_lock_bh();
- private = rcu_dereference(table->private);
- table_base = rcu_dereference(private->entries[smp_processor_id()]);
+ xt_info_rdlock_bh();
+ private = table->private;
+ table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
back = get_entry(table_base, private->underflow[hook]);
@@ -273,6 +273,7 @@
hdr_len = sizeof(*arp) + (2 * sizeof(struct in_addr)) +
(2 * skb->dev->addr_len);
+
ADD_COUNTER(e->counters, hdr_len, 1);
t = arpt_get_target(e);
@@ -328,8 +329,7 @@
e = (void *)e + e->next_offset;
}
} while (!hotdrop);
-
- rcu_read_unlock_bh();
+ xt_info_rdunlock_bh();
if (hotdrop)
return NF_DROP;
@@ -711,9 +711,12 @@
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
* with data used by 'current' CPU
- * We dont care about preemption here.
+ *
+ * Bottom half has to be disabled to prevent deadlock
+ * if new softirq were to run and call ipt_do_table
*/
- curcpu = raw_smp_processor_id();
+ local_bh_disable();
+ curcpu = smp_processor_id();
i = 0;
ARPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -726,73 +729,22 @@
if (cpu == curcpu)
continue;
i = 0;
+ xt_info_wrlock(cpu);
ARPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
counters,
&i);
+ xt_info_wrunlock(cpu);
}
-}
-
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct arpt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
-{
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
- (*i)++;
- return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
- const struct xt_counters counters[])
-{
- unsigned int i, cpu;
-
- local_bh_disable();
- cpu = smp_processor_id();
- i = 0;
- ARPT_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_counter_to_entry,
- counters,
- &i);
local_bh_enable();
}
-static inline int
-zero_entry_counter(struct arpt_entry *e, void *arg)
-{
- e->counters.bcnt = 0;
- e->counters.pcnt = 0;
- return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
- unsigned int cpu;
- const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
- memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
- for_each_possible_cpu(cpu) {
- memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
- ARPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
- zero_entry_counter, NULL);
- }
-}
-
static struct xt_counters *alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
struct xt_table_info *private = table->private;
- struct xt_table_info *info;
/* We need atomic snapshot of counters: rest doesn't change
* (other than comefrom, which userspace doesn't care
@@ -802,30 +754,11 @@
counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL)
- goto nomem;
+ return ERR_PTR(-ENOMEM);
- info = xt_alloc_table_info(private->size);
- if (!info)
- goto free_counters;
-
- clone_counters(info, private);
-
- mutex_lock(&table->lock);
- xt_table_entry_swap_rcu(private, info);
- synchronize_net(); /* Wait until smoke has cleared */
-
- get_counters(info, counters);
- put_counters(private, counters);
- mutex_unlock(&table->lock);
-
- xt_free_table_info(info);
+ get_counters(private, counters);
return counters;
-
- free_counters:
- vfree(counters);
- nomem:
- return ERR_PTR(-ENOMEM);
}
static int copy_entries_to_user(unsigned int total_size,
@@ -1094,8 +1027,9 @@
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
ARPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1165,10 +1099,23 @@
return ret;
}
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct arpt_entry *e,
+ const struct xt_counters addme[],
+ unsigned int *i)
+{
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
+
static int do_add_counters(struct net *net, void __user *user, unsigned int len,
int compat)
{
- unsigned int i;
+ unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1224,26 +1171,26 @@
goto free;
}
- mutex_lock(&t->lock);
+ local_bh_disable();
private = t->private;
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
- preempt_disable();
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[smp_processor_id()];
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ xt_info_wrlock(curcpu);
ARPT_ENTRY_ITERATE(loc_cpu_entry,
private->size,
add_counter_to_entry,
paddc,
&i);
- preempt_enable();
+ xt_info_wrunlock(curcpu);
unlock_up_free:
- mutex_unlock(&t->lock);
-
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 810c0b6..2ec8d72 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -338,10 +338,9 @@
tgpar.hooknum = hook;
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
-
- rcu_read_lock_bh();
- private = rcu_dereference(table->private);
- table_base = rcu_dereference(private->entries[smp_processor_id()]);
+ xt_info_rdlock_bh();
+ private = table->private;
+ table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
@@ -436,8 +435,7 @@
e = (void *)e + e->next_offset;
}
} while (!hotdrop);
-
- rcu_read_unlock_bh();
+ xt_info_rdunlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -896,10 +894,13 @@
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
- * with data used by 'current' CPU
- * We dont care about preemption here.
+ * with data used by 'current' CPU.
+ *
+ * Bottom half has to be disabled to prevent deadlock
+ * if new softirq were to run and call ipt_do_table
*/
- curcpu = raw_smp_processor_id();
+ local_bh_disable();
+ curcpu = smp_processor_id();
i = 0;
IPT_ENTRY_ITERATE(t->entries[curcpu],
@@ -912,74 +913,22 @@
if (cpu == curcpu)
continue;
i = 0;
+ xt_info_wrlock(cpu);
IPT_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
counters,
&i);
+ xt_info_wrunlock(cpu);
}
-
-}
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ipt_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
-{
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
- (*i)++;
- return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
- const struct xt_counters counters[])
-{
- unsigned int i, cpu;
-
- local_bh_disable();
- cpu = smp_processor_id();
- i = 0;
- IPT_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_counter_to_entry,
- counters,
- &i);
local_bh_enable();
}
-
-static inline int
-zero_entry_counter(struct ipt_entry *e, void *arg)
-{
- e->counters.bcnt = 0;
- e->counters.pcnt = 0;
- return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
- unsigned int cpu;
- const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
- memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
- for_each_possible_cpu(cpu) {
- memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
- IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
- zero_entry_counter, NULL);
- }
-}
-
static struct xt_counters * alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
struct xt_table_info *private = table->private;
- struct xt_table_info *info;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
@@ -988,30 +937,11 @@
counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL)
- goto nomem;
+ return ERR_PTR(-ENOMEM);
- info = xt_alloc_table_info(private->size);
- if (!info)
- goto free_counters;
-
- clone_counters(info, private);
-
- mutex_lock(&table->lock);
- xt_table_entry_swap_rcu(private, info);
- synchronize_net(); /* Wait until smoke has cleared */
-
- get_counters(info, counters);
- put_counters(private, counters);
- mutex_unlock(&table->lock);
-
- xt_free_table_info(info);
+ get_counters(private, counters);
return counters;
-
- free_counters:
- vfree(counters);
- nomem:
- return ERR_PTR(-ENOMEM);
}
static int
@@ -1306,8 +1236,9 @@
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1377,11 +1308,23 @@
return ret;
}
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ipt_entry *e,
+ const struct xt_counters addme[],
+ unsigned int *i)
+{
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
static int
do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
{
- unsigned int i;
+ unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1437,25 +1380,26 @@
goto free;
}
- mutex_lock(&t->lock);
+ local_bh_disable();
private = t->private;
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
- preempt_disable();
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ curcpu = smp_processor_id();
+ loc_cpu_entry = private->entries[curcpu];
+ xt_info_wrlock(curcpu);
IPT_ENTRY_ITERATE(loc_cpu_entry,
private->size,
add_counter_to_entry,
paddc,
&i);
- preempt_enable();
+ xt_info_wrunlock(curcpu);
unlock_up_free:
- mutex_unlock(&t->lock);
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 800ae85..219e165 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -365,9 +365,9 @@
IP_NF_ASSERT(table->valid_hooks & (1 << hook));
- rcu_read_lock_bh();
- private = rcu_dereference(table->private);
- table_base = rcu_dereference(private->entries[smp_processor_id()]);
+ xt_info_rdlock_bh();
+ private = table->private;
+ table_base = private->entries[smp_processor_id()];
e = get_entry(table_base, private->hook_entry[hook]);
@@ -466,7 +466,7 @@
#ifdef CONFIG_NETFILTER_DEBUG
((struct ip6t_entry *)table_base)->comefrom = NETFILTER_LINK_POISON;
#endif
- rcu_read_unlock_bh();
+ xt_info_rdunlock_bh();
#ifdef DEBUG_ALLOW_ALL
return NF_ACCEPT;
@@ -926,9 +926,12 @@
/* Instead of clearing (by a previous call to memset())
* the counters and using adds, we set the counters
* with data used by 'current' CPU
- * We dont care about preemption here.
+ *
+ * Bottom half has to be disabled to prevent deadlock
+ * if new softirq were to run and call ipt_do_table
*/
- curcpu = raw_smp_processor_id();
+ local_bh_disable();
+ curcpu = smp_processor_id();
i = 0;
IP6T_ENTRY_ITERATE(t->entries[curcpu],
@@ -941,72 +944,22 @@
if (cpu == curcpu)
continue;
i = 0;
+ xt_info_wrlock(cpu);
IP6T_ENTRY_ITERATE(t->entries[cpu],
t->size,
add_entry_to_counter,
counters,
&i);
+ xt_info_wrunlock(cpu);
}
-}
-
-/* We're lazy, and add to the first CPU; overflow works its fey magic
- * and everything is OK. */
-static int
-add_counter_to_entry(struct ip6t_entry *e,
- const struct xt_counters addme[],
- unsigned int *i)
-{
- ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
-
- (*i)++;
- return 0;
-}
-
-/* Take values from counters and add them back onto the current cpu */
-static void put_counters(struct xt_table_info *t,
- const struct xt_counters counters[])
-{
- unsigned int i, cpu;
-
- local_bh_disable();
- cpu = smp_processor_id();
- i = 0;
- IP6T_ENTRY_ITERATE(t->entries[cpu],
- t->size,
- add_counter_to_entry,
- counters,
- &i);
local_bh_enable();
}
-static inline int
-zero_entry_counter(struct ip6t_entry *e, void *arg)
-{
- e->counters.bcnt = 0;
- e->counters.pcnt = 0;
- return 0;
-}
-
-static void
-clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
-{
- unsigned int cpu;
- const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
-
- memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
- for_each_possible_cpu(cpu) {
- memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
- IP6T_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
- zero_entry_counter, NULL);
- }
-}
-
static struct xt_counters *alloc_counters(struct xt_table *table)
{
unsigned int countersize;
struct xt_counters *counters;
struct xt_table_info *private = table->private;
- struct xt_table_info *info;
/* We need atomic snapshot of counters: rest doesn't change
(other than comefrom, which userspace doesn't care
@@ -1015,30 +968,11 @@
counters = vmalloc_node(countersize, numa_node_id());
if (counters == NULL)
- goto nomem;
+ return ERR_PTR(-ENOMEM);
- info = xt_alloc_table_info(private->size);
- if (!info)
- goto free_counters;
-
- clone_counters(info, private);
-
- mutex_lock(&table->lock);
- xt_table_entry_swap_rcu(private, info);
- synchronize_net(); /* Wait until smoke has cleared */
-
- get_counters(info, counters);
- put_counters(private, counters);
- mutex_unlock(&table->lock);
-
- xt_free_table_info(info);
+ get_counters(private, counters);
return counters;
-
- free_counters:
- vfree(counters);
- nomem:
- return ERR_PTR(-ENOMEM);
}
static int
@@ -1334,8 +1268,9 @@
(newinfo->number <= oldinfo->initial_entries))
module_put(t->me);
- /* Get the old counters. */
+ /* Get the old counters, and synchronize with replace */
get_counters(oldinfo, counters);
+
/* Decrease module usage counts and free resource */
loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
IP6T_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
@@ -1405,11 +1340,24 @@
return ret;
}
+/* We're lazy, and add to the first CPU; overflow works its fey magic
+ * and everything is OK. */
+static int
+add_counter_to_entry(struct ip6t_entry *e,
+ const struct xt_counters addme[],
+ unsigned int *i)
+{
+ ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
+
+ (*i)++;
+ return 0;
+}
+
static int
do_add_counters(struct net *net, void __user *user, unsigned int len,
int compat)
{
- unsigned int i;
+ unsigned int i, curcpu;
struct xt_counters_info tmp;
struct xt_counters *paddc;
unsigned int num_counters;
@@ -1465,25 +1413,28 @@
goto free;
}
- mutex_lock(&t->lock);
+
+ local_bh_disable();
private = t->private;
if (private->number != num_counters) {
ret = -EINVAL;
goto unlock_up_free;
}
- preempt_disable();
i = 0;
/* Choose the copy that is on our node */
- loc_cpu_entry = private->entries[raw_smp_processor_id()];
+ curcpu = smp_processor_id();
+ xt_info_wrlock(curcpu);
+ loc_cpu_entry = private->entries[curcpu];
IP6T_ENTRY_ITERATE(loc_cpu_entry,
private->size,
add_counter_to_entry,
paddc,
&i);
- preempt_enable();
+ xt_info_wrunlock(curcpu);
+
unlock_up_free:
- mutex_unlock(&t->lock);
+ local_bh_enable();
xt_table_unlock(t);
module_put(t->me);
free:
diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c
index 509a956..150e5cf 100644
--- a/net/netfilter/x_tables.c
+++ b/net/netfilter/x_tables.c
@@ -625,20 +625,6 @@
}
EXPORT_SYMBOL(xt_free_table_info);
-void xt_table_entry_swap_rcu(struct xt_table_info *oldinfo,
- struct xt_table_info *newinfo)
-{
- unsigned int cpu;
-
- for_each_possible_cpu(cpu) {
- void *p = oldinfo->entries[cpu];
- rcu_assign_pointer(oldinfo->entries[cpu], newinfo->entries[cpu]);
- newinfo->entries[cpu] = p;
- }
-
-}
-EXPORT_SYMBOL_GPL(xt_table_entry_swap_rcu);
-
/* Find table by name, grabs mutex & ref. Returns ERR_PTR() on error. */
struct xt_table *xt_find_table_lock(struct net *net, u_int8_t af,
const char *name)
@@ -676,32 +662,43 @@
EXPORT_SYMBOL_GPL(xt_compat_unlock);
#endif
+DEFINE_PER_CPU(struct xt_info_lock, xt_info_locks);
+EXPORT_PER_CPU_SYMBOL_GPL(xt_info_locks);
+
+
struct xt_table_info *
xt_replace_table(struct xt_table *table,
unsigned int num_counters,
struct xt_table_info *newinfo,
int *error)
{
- struct xt_table_info *oldinfo, *private;
+ struct xt_table_info *private;
/* Do the substitution. */
- mutex_lock(&table->lock);
+ local_bh_disable();
private = table->private;
+
/* Check inside lock: is the old number correct? */
if (num_counters != private->number) {
duprintf("num_counters != table->private->number (%u/%u)\n",
num_counters, private->number);
- mutex_unlock(&table->lock);
+ local_bh_enable();
*error = -EAGAIN;
return NULL;
}
- oldinfo = private;
- rcu_assign_pointer(table->private, newinfo);
- newinfo->initial_entries = oldinfo->initial_entries;
- mutex_unlock(&table->lock);
- synchronize_net();
- return oldinfo;
+ table->private = newinfo;
+ newinfo->initial_entries = private->initial_entries;
+
+ /*
+ * Even though table entries have now been swapped, other CPU's
+ * may still be using the old entries. This is okay, because
+ * resynchronization happens because of the locking done
+ * during the get_counters() routine.
+ */
+ local_bh_enable();
+
+ return private;
}
EXPORT_SYMBOL_GPL(xt_replace_table);
@@ -734,7 +731,6 @@
/* Simplifies replace_table code. */
table->private = bootstrap;
- mutex_init(&table->lock);
if (!xt_replace_table(table, 0, newinfo, &ret))
goto unlock;
@@ -1147,7 +1143,14 @@
static int __init xt_init(void)
{
- int i, rv;
+ unsigned int i;
+ int rv;
+
+ for_each_possible_cpu(i) {
+ struct xt_info_lock *lock = &per_cpu(xt_info_locks, i);
+ spin_lock_init(&lock->lock);
+ lock->readers = 0;
+ }
xt = kmalloc(sizeof(struct xt_af) * NFPROTO_NUMPROTO, GFP_KERNEL);
if (!xt)