Merge branch 'tun_rfs'
Zhi Yong Wu says:
====================
tun: add the RFS support
Since Tom Herbert's hash related patchset was modified and got merged,
his pachset about adding support for RFS on tun flows also need to get
adjusted accordingly. I tried to update them, and before i will start
to do some perf tests, i hope to get one correct code base, so it's time
to post them out now. Any constructive comments are welcome, thanks.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index a17a701..3cf0457 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -152,6 +152,7 @@
struct tun_struct *tun;
u32 rxhash;
+ u32 rps_rxhash;
int queue_index;
unsigned long updated;
};
@@ -220,6 +221,7 @@
rxhash, queue_index);
e->updated = jiffies;
e->rxhash = rxhash;
+ e->rps_rxhash = 0;
e->queue_index = queue_index;
e->tun = tun;
hlist_add_head_rcu(&e->hash_link, head);
@@ -232,6 +234,7 @@
{
tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
e->rxhash, e->queue_index);
+ sock_rps_reset_flow_hash(e->rps_rxhash);
hlist_del_rcu(&e->hash_link);
kfree_rcu(e, rcu);
--tun->flow_count;
@@ -325,6 +328,7 @@
/* TODO: keep queueing to old queue until it's empty? */
e->queue_index = queue_index;
e->updated = jiffies;
+ sock_rps_record_flow_hash(e->rps_rxhash);
} else {
spin_lock_bh(&tun->lock);
if (!tun_flow_find(head, rxhash) &&
@@ -341,6 +345,18 @@
rcu_read_unlock();
}
+/**
+ * Save the hash received in the stack receive path and update the
+ * flow_hash table accordingly.
+ */
+static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
+{
+ if (unlikely(e->rps_rxhash != hash)) {
+ sock_rps_reset_flow_hash(e->rps_rxhash);
+ e->rps_rxhash = hash;
+ }
+}
+
/* We try to identify a flow through its rxhash first. The reason that
* we do not check rxq no. is because some cards(e.g 82599), chooses
* the rxq based on the txq where the last packet of the flow comes. As
@@ -361,9 +377,10 @@
txq = skb_get_hash(skb);
if (txq) {
e = tun_flow_find(&tun->flows[tun_hashfn(txq)], txq);
- if (e)
+ if (e) {
txq = e->queue_index;
- else
+ tun_flow_save_rps_rxhash(e, txq);
+ } else
/* use multiply and shift instead of expensive divide */
txq = ((u64)txq * numqueues) >> 32;
} else if (likely(skb_rx_queue_recorded(skb))) {
@@ -728,6 +745,22 @@
if (txq >= tun->numqueues)
goto drop;
+ if (tun->numqueues == 1) {
+ /* Select queue was not called for the skbuff, so we extract the
+ * RPS hash and save it into the flow_table here.
+ */
+ __u32 rxhash;
+
+ rxhash = skb_get_hash(skb);
+ if (rxhash) {
+ struct tun_flow_entry *e;
+ e = tun_flow_find(&tun->flows[tun_hashfn(rxhash)],
+ rxhash);
+ if (e)
+ tun_flow_save_rps_rxhash(e, rxhash);
+ }
+ }
+
tun_debug(KERN_INFO, tun, "tun_net_xmit %d\n", skb->len);
BUG_ON(!tfile);
diff --git a/include/net/sock.h b/include/net/sock.h
index 2ef3c3e..8ee90ad 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -820,30 +820,40 @@
return sk->sk_backlog_rcv(sk, skb);
}
-static inline void sock_rps_record_flow(const struct sock *sk)
+static inline void sock_rps_record_flow_hash(__u32 hash)
{
#ifdef CONFIG_RPS
struct rps_sock_flow_table *sock_flow_table;
rcu_read_lock();
sock_flow_table = rcu_dereference(rps_sock_flow_table);
- rps_record_sock_flow(sock_flow_table, sk->sk_rxhash);
+ rps_record_sock_flow(sock_flow_table, hash);
rcu_read_unlock();
#endif
}
-static inline void sock_rps_reset_flow(const struct sock *sk)
+static inline void sock_rps_reset_flow_hash(__u32 hash)
{
#ifdef CONFIG_RPS
struct rps_sock_flow_table *sock_flow_table;
rcu_read_lock();
sock_flow_table = rcu_dereference(rps_sock_flow_table);
- rps_reset_sock_flow(sock_flow_table, sk->sk_rxhash);
+ rps_reset_sock_flow(sock_flow_table, hash);
rcu_read_unlock();
#endif
}
+static inline void sock_rps_record_flow(const struct sock *sk)
+{
+ sock_rps_record_flow_hash(sk->sk_rxhash);
+}
+
+static inline void sock_rps_reset_flow(const struct sock *sk)
+{
+ sock_rps_reset_flow_hash(sk->sk_rxhash);
+}
+
static inline void sock_rps_save_rxhash(struct sock *sk,
const struct sk_buff *skb)
{