[NET]: make netlink user -> kernel interface synchronious
This patch make processing netlink user -> kernel messages synchronious.
This change was inspired by the talk with Alexey Kuznetsov about current
netlink messages processing. He says that he was badly wrong when introduced
asynchronious user -> kernel communication.
The call netlink_unicast is the only path to send message to the kernel
netlink socket. But, unfortunately, it is also used to send data to the
user.
Before this change the user message has been attached to the socket queue
and sk->sk_data_ready was called. The process has been blocked until all
pending messages were processed. The bad thing is that this processing
may occur in the arbitrary process context.
This patch changes nlk->data_ready callback to get 1 skb and force packet
processing right in the netlink_unicast.
Kernel -> user path in netlink_unicast remains untouched.
EINTR processing for in netlink_run_queue was changed. It forces rtnl_lock
drop, but the process remains in the cycle until the message will be fully
processed. So, there is no need to use this kludges now.
Signed-off-by: Denis V. Lunev <den@openvz.org>
Acked-by: Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index f823ca3..a5cba23 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -62,6 +62,9 @@
#define FIB_TABLE_HASHSZ 256
static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ];
+static struct sock *fibnl = NULL;
+
+
struct fib_table *fib_new_table(u32 id)
{
struct fib_table *tb;
@@ -811,13 +814,13 @@
pid = NETLINK_CB(skb).pid; /* pid of sending process */
NETLINK_CB(skb).pid = 0; /* from kernel */
NETLINK_CB(skb).dst_group = 0; /* unicast */
- netlink_unicast(sk, skb, pid, MSG_DONTWAIT);
+ netlink_unicast(fibnl, skb, pid, MSG_DONTWAIT);
}
static void nl_fib_lookup_init(void)
{
- netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0, nl_fib_input,
- NULL, THIS_MODULE);
+ fibnl = netlink_kernel_create(&init_net, NETLINK_FIB_LOOKUP, 0,
+ nl_fib_input, NULL, THIS_MODULE);
}
static void fib_disable_ip(struct net_device *dev, int force)
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index b04a6ee..7eb83eb 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -839,15 +839,11 @@
static DEFINE_MUTEX(inet_diag_mutex);
-static void inet_diag_rcv(struct sock *sk, int len)
+static void inet_diag_rcv(struct sk_buff *skb)
{
- unsigned int qlen = 0;
-
- do {
- mutex_lock(&inet_diag_mutex);
- qlen = netlink_run_queue(sk, qlen, &inet_diag_rcv_msg);
- mutex_unlock(&inet_diag_mutex);
- } while (qlen);
+ mutex_lock(&inet_diag_mutex);
+ netlink_rcv_skb(skb, &inet_diag_rcv_msg);
+ mutex_unlock(&inet_diag_mutex);
}
static DEFINE_SPINLOCK(inet_diag_register_lock);
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index aaa3f5c..23cbfc7 100644
--- a/net/ipv4/netfilter/ip_queue.c
+++ b/net/ipv4/netfilter/ip_queue.c
@@ -475,7 +475,7 @@
#define RCV_SKB_FAIL(err) do { netlink_ack(skb, nlh, (err)); return; } while (0)
static inline void
-ipq_rcv_skb(struct sk_buff *skb)
+__ipq_rcv_skb(struct sk_buff *skb)
{
int status, type, pid, flags, nlmsglen, skblen;
struct nlmsghdr *nlh;
@@ -533,19 +533,10 @@
}
static void
-ipq_rcv_sk(struct sock *sk, int len)
+ipq_rcv_skb(struct sk_buff *skb)
{
- struct sk_buff *skb;
- unsigned int qlen;
-
mutex_lock(&ipqnl_mutex);
-
- for (qlen = skb_queue_len(&sk->sk_receive_queue); qlen; qlen--) {
- skb = skb_dequeue(&sk->sk_receive_queue);
- ipq_rcv_skb(skb);
- kfree_skb(skb);
- }
-
+ __ipq_rcv_skb(skb);
mutex_unlock(&ipqnl_mutex);
}
@@ -670,7 +661,7 @@
netlink_register_notifier(&ipq_nl_notifier);
ipqnl = netlink_kernel_create(&init_net, NETLINK_FIREWALL, 0,
- ipq_rcv_sk, NULL, THIS_MODULE);
+ ipq_rcv_skb, NULL, THIS_MODULE);
if (ipqnl == NULL) {
printk(KERN_ERR "ip_queue: failed to create netlink socket\n");
goto cleanup_netlink_notifier;