net: sk_dst_cache RCUification
With latest CONFIG_PROVE_RCU stuff, I felt more comfortable to make this
work.
sk->sk_dst_cache is currently protected by a rwlock (sk_dst_lock)
This rwlock is readlocked for a very small amount of time, and dst
entries are already freed after RCU grace period. This calls for RCU
again :)
This patch converts sk_dst_lock to a spinlock, and use RCU for readers.
__sk_dst_get() is supposed to be called with rcu_read_lock() or if
socket locked by user, so use appropriate rcu_dereference_check()
condition (rcu_read_lock_held() || sock_owned_by_user(sk))
This patch avoids two atomic ops per tx packet on UDP connected sockets,
for example, and permits sk_dst_lock to be much less dirtied.
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/net/core/dev.c b/net/core/dev.c
index 0eb79e3..ca4cdef 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2015,7 +2015,7 @@
if (dev->real_num_tx_queues > 1)
queue_index = skb_tx_hash(dev, skb);
- if (sk && sk->sk_dst_cache)
+ if (sk && rcu_dereference_check(sk->sk_dst_cache, 1))
sk_tx_queue_set(sk, queue_index);
}
}
diff --git a/net/core/sock.c b/net/core/sock.c
index c5812bb..7effa1e 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -364,11 +364,11 @@
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie)
{
- struct dst_entry *dst = sk->sk_dst_cache;
+ struct dst_entry *dst = __sk_dst_get(sk);
if (dst && dst->obsolete && dst->ops->check(dst, cookie) == NULL) {
sk_tx_queue_clear(sk);
- sk->sk_dst_cache = NULL;
+ rcu_assign_pointer(sk->sk_dst_cache, NULL);
dst_release(dst);
return NULL;
}
@@ -1157,7 +1157,7 @@
skb_queue_head_init(&newsk->sk_async_wait_queue);
#endif
- rwlock_init(&newsk->sk_dst_lock);
+ spin_lock_init(&newsk->sk_dst_lock);
rwlock_init(&newsk->sk_callback_lock);
lockdep_set_class_and_name(&newsk->sk_callback_lock,
af_callback_keys + newsk->sk_family,
@@ -1898,7 +1898,7 @@
} else
sk->sk_sleep = NULL;
- rwlock_init(&sk->sk_dst_lock);
+ spin_lock_init(&sk->sk_dst_lock);
rwlock_init(&sk->sk_callback_lock);
lockdep_set_class_and_name(&sk->sk_callback_lock,
af_callback_keys + sk->sk_family,