fasync: RCU and fine grained locking
kill_fasync() uses a central rwlock, candidate for RCU conversion, to
avoid cache line ping pongs on SMP.
fasync_remove_entry() and fasync_add_entry() can disable IRQS on a short
section instead during whole list scan.
Use a spinlock per fasync_struct to synchronize kill_fasync_rcu() and
fasync_{remove|add}_entry(). This spinlock is IRQ safe, so sock_fasync()
doesnt need its own implementation and can use fasync_helper(), to
reduce code size and complexity.
We can remove __kill_fasync() direct use in net/socket.c, and rename it
to kill_fasync_rcu().
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/socket.c b/net/socket.c
index 35bc198..9822081 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1067,78 +1067,27 @@
* 1. fasync_list is modified only under process context socket lock
* i.e. under semaphore.
* 2. fasync_list is used under read_lock(&sk->sk_callback_lock)
- * or under socket lock.
- * 3. fasync_list can be used from softirq context, so that
- * modification under socket lock have to be enhanced with
- * write_lock_bh(&sk->sk_callback_lock).
- * --ANK (990710)
+ * or under socket lock
*/
static int sock_fasync(int fd, struct file *filp, int on)
{
- struct fasync_struct *fa, *fna = NULL, **prev;
- struct socket *sock;
- struct sock *sk;
+ struct socket *sock = filp->private_data;
+ struct sock *sk = sock->sk;
- if (on) {
- fna = kmalloc(sizeof(struct fasync_struct), GFP_KERNEL);
- if (fna == NULL)
- return -ENOMEM;
- }
-
- sock = filp->private_data;
-
- sk = sock->sk;
- if (sk == NULL) {
- kfree(fna);
+ if (sk == NULL)
return -EINVAL;
- }
lock_sock(sk);
- spin_lock(&filp->f_lock);
- if (on)
- filp->f_flags |= FASYNC;
+ fasync_helper(fd, filp, on, &sock->fasync_list);
+
+ if (!sock->fasync_list)
+ sock_reset_flag(sk, SOCK_FASYNC);
else
- filp->f_flags &= ~FASYNC;
- spin_unlock(&filp->f_lock);
-
- prev = &(sock->fasync_list);
-
- for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev)
- if (fa->fa_file == filp)
- break;
-
- if (on) {
- if (fa != NULL) {
- write_lock_bh(&sk->sk_callback_lock);
- fa->fa_fd = fd;
- write_unlock_bh(&sk->sk_callback_lock);
-
- kfree(fna);
- goto out;
- }
- fna->fa_file = filp;
- fna->fa_fd = fd;
- fna->magic = FASYNC_MAGIC;
- fna->fa_next = sock->fasync_list;
- write_lock_bh(&sk->sk_callback_lock);
- sock->fasync_list = fna;
sock_set_flag(sk, SOCK_FASYNC);
- write_unlock_bh(&sk->sk_callback_lock);
- } else {
- if (fa != NULL) {
- write_lock_bh(&sk->sk_callback_lock);
- *prev = fa->fa_next;
- if (!sock->fasync_list)
- sock_reset_flag(sk, SOCK_FASYNC);
- write_unlock_bh(&sk->sk_callback_lock);
- kfree(fa);
- }
- }
-out:
- release_sock(sock->sk);
+ release_sock(sk);
return 0;
}
@@ -1159,10 +1108,10 @@
/* fall through */
case SOCK_WAKE_IO:
call_kill:
- __kill_fasync(sock->fasync_list, SIGIO, band);
+ kill_fasync(&sock->fasync_list, SIGIO, band);
break;
case SOCK_WAKE_URG:
- __kill_fasync(sock->fasync_list, SIGURG, band);
+ kill_fasync(&sock->fasync_list, SIGURG, band);
}
return 0;
}