cxgb4: Add low latency socket busy_poll support
cxgb_busy_poll, corresponding to ndo_busy_poll, gets called by the socket
waiting for data.
With busy_poll enabled, improvement is seen in latency numbers as observed by
collecting netperf TCP_RR numbers.
Below are latency number, with and without busy-poll, in a switched environment
for a particular msg size:
netperf command: netperf -4 -H <ip> -l 30 -t TCP_RR -- -r1,1
Latency without busy-poll: ~16.25 us
Latency with busy-poll : ~08.79 us
Based on original work by Kumar Sanghvi <kumaras@chelsio.com>
Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 5bf490a..041742b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -923,8 +923,14 @@
for (i = 0; i < ARRAY_SIZE(adap->sge.ingr_map); i++) {
struct sge_rspq *q = adap->sge.ingr_map[i];
- if (q && q->handler)
+ if (q && q->handler) {
napi_disable(&q->napi);
+ local_bh_disable();
+ while (!cxgb_poll_lock_napi(q))
+ mdelay(1);
+ local_bh_enable();
+ }
+
}
}
@@ -940,8 +946,10 @@
if (!q)
continue;
- if (q->handler)
+ if (q->handler) {
+ cxgb_busy_poll_init_lock(q);
napi_enable(&q->napi);
+ }
/* 0-increment GTS to start the timer and enable interrupts */
t4_write_reg(adap, MYPF_REG(SGE_PF_GTS_A),
SEINTARM_V(q->intr_params) |
@@ -4563,6 +4571,10 @@
#ifdef CONFIG_NET_POLL_CONTROLLER
.ndo_poll_controller = cxgb_netpoll,
#endif
+#ifdef CONFIG_NET_RX_BUSY_POLL
+ .ndo_busy_poll = cxgb_busy_poll,
+#endif
+
};
void t4_fatal_err(struct adapter *adap)