qcacmn: Add fastpath Rx support
With dedicated CE for Rx and Tx completion HTT messages, skip processing
in Host Target Communication layer.
Do special handling in HIF-CE and HTT layer, this optimization results
in 3-4% CPU utilization gain.
Change-Id: I400148a0e24ac62dd09e2a95d5f35d94d83fe2df
CRs-Fixed: 987182
diff --git a/hif/src/ce/ce_service.c b/hif/src/ce/ce_service.c
index 2f1c0a4..cb878fd 100644
--- a/hif/src/ce/ce_service.c
+++ b/hif/src/ce/ce_service.c
@@ -36,6 +36,9 @@
#include "epping_main.h"
#include "hif_main.h"
#include "hif_debug.h"
+#include "ol_txrx_types.h"
+#include <cds_api.h>
+#include <osdep.h>
#ifdef IPA_OFFLOAD
#ifdef QCA_WIFI_3_0
@@ -525,7 +528,6 @@
u_int32_t ctrl_addr = ce_state->ctrl_addr;
unsigned int nentries_mask = src_ring->nentries_mask;
unsigned int write_index;
- unsigned int sw_index;
unsigned int frag_len;
qdf_nbuf_t msdu;
int i;
@@ -533,7 +535,6 @@
uint32_t user_flags = 0;
qdf_spin_lock_bh(&ce_state->ce_index_lock);
- sw_index = src_ring->sw_index;
write_index = src_ring->write_index;
/* 2 msdus per packet */
@@ -673,10 +674,12 @@
return -EIO;
}
- if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) > 0) {
+ if ((CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) > 0) ||
+ (ce_is_fastpath_enabled((struct hif_opaque_softc *)scn) &&
+ CE_state->htt_rx_data &&
+ (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) == 0))) {
struct CE_dest_desc *dest_ring_base =
- (struct CE_dest_desc *)dest_ring->
- base_addr_owner_space;
+ (struct CE_dest_desc *)dest_ring->base_addr_owner_space;
struct CE_dest_desc *dest_desc =
CE_DEST_RING_TO_DESC(dest_ring_base, write_index);
@@ -697,12 +700,14 @@
/* Update Destination Ring Write Index */
write_index = CE_RING_IDX_INCR(nentries_mask, write_index);
- CE_DEST_RING_WRITE_IDX_SET(scn, ctrl_addr, write_index);
- dest_ring->write_index = write_index;
+ if (write_index != sw_index) {
+ CE_DEST_RING_WRITE_IDX_SET(scn, ctrl_addr, write_index);
+ dest_ring->write_index = write_index;
+ }
status = QDF_STATUS_SUCCESS;
- } else {
+ } else
status = QDF_STATUS_E_FAILURE;
- }
+
Q_TARGET_ACCESS_END(scn);
qdf_spin_unlock_bh(&CE_state->ce_index_lock);
return status;
@@ -1267,6 +1272,211 @@
#endif /*ATH_11AC_TXCOMPACT */
+#ifdef WLAN_FEATURE_FASTPATH
+
+/**
+ * ce_tx_completion() - reap off the CE source ring when CE completion happens
+ * @ce_state: Handle to CE
+ * @num_tx_cmpls: Number of completions handled
+ *
+ * API to reap off the CE source ring when CE completion happens:
+ * Update number of src_ring entries based on number of completions.
+ *
+ * Return: None
+ */
+static void
+ce_tx_completion(struct CE_state *ce_state, uint32_t num_tx_cmpls)
+{
+ struct CE_ring_state *src_ring = ce_state->src_ring;
+ uint32_t nentries_mask = src_ring->nentries_mask;
+
+ ASSERT(num_tx_cmpls);
+
+ qdf_spin_lock(&ce_state->ce_index_lock);
+
+ /*
+ * This locks the index manipulation of this CE with those done
+ * in ce_send_fast().
+ */
+
+ /*
+ * Advance the s/w index:
+ * This effectively simulates completing the CE ring descriptors
+ */
+ src_ring->sw_index = CE_RING_IDX_ADD(nentries_mask, src_ring->sw_index,
+ num_tx_cmpls);
+ qdf_spin_unlock(&ce_state->ce_index_lock);
+}
+
+/**
+ * ce_fastpath_rx_handle() - Updates write_index and calls fastpath msg handler
+ * @ce_state: handle to copy engine state
+ * @cmpl_msdus: Rx msdus
+ * @num_cmpls: number of Rx msdus
+ * @ctrl_addr: CE control address
+ *
+ * Return: None
+ */
+static void ce_fastpath_rx_handle(struct CE_state *ce_state,
+ qdf_nbuf_t *cmpl_msdus, uint32_t num_cmpls,
+ uint32_t ctrl_addr)
+{
+ struct hif_softc *scn = ce_state->scn;
+ struct CE_ring_state *dest_ring = ce_state->dest_ring;
+ struct CE_state *ce_tx_cmpl_state = scn->ce_id_to_state[CE_HTT_H2T_MSG];
+ uint32_t nentries_mask = dest_ring->nentries_mask;
+ uint32_t tx_cmpls;
+ uint32_t write_index;
+
+ tx_cmpls = (ce_state->fastpath_handler)(ce_state->context, cmpl_msdus,
+ num_cmpls);
+
+ /* Update Destination Ring Write Index */
+ write_index = dest_ring->write_index;
+ write_index = CE_RING_IDX_ADD(nentries_mask, write_index, num_cmpls);
+ CE_DEST_RING_WRITE_IDX_SET(scn, ctrl_addr, write_index);
+ dest_ring->write_index = write_index;
+ ce_tx_completion(ce_tx_cmpl_state, tx_cmpls);
+}
+
+#define MSG_FLUSH_NUM 20
+/**
+ * ce_per_engine_service_fast() - CE handler routine to service fastpath messages
+ * @scn: hif_context
+ * @ce_id: COpy engine ID
+ * Function:
+ * 1) Go through the CE ring, and find the completions
+ * 2) For valid completions retrieve context (nbuf) for per_transfer_context[]
+ * 3) Unmap buffer & accumulate in an array.
+ * 4) Call message handler when array is full or when exiting the handler
+ *
+ * Return: void
+ */
+
+static int
+ce_per_engine_service_fast(struct hif_softc *scn, int ce_id)
+{
+ struct CE_state *ce_state = scn->ce_id_to_state[ce_id];
+ struct CE_ring_state *dest_ring = ce_state->dest_ring;
+ struct CE_dest_desc *dest_ring_base =
+ (struct CE_dest_desc *)dest_ring->base_addr_owner_space;
+
+ uint32_t nentries_mask = dest_ring->nentries_mask;
+ uint32_t sw_index = dest_ring->sw_index;
+ uint32_t nbytes;
+ qdf_nbuf_t nbuf;
+ uint32_t paddr_lo;
+ struct CE_dest_desc *dest_desc;
+ uint32_t ce_int_status = (1 << ce_id);
+ qdf_nbuf_t cmpl_msdus[MSG_FLUSH_NUM];
+ uint32_t ctrl_addr = ce_state->ctrl_addr;
+ uint32_t nbuf_cmpl_idx = 0;
+
+more_data:
+ if (ce_int_status == (1 << ce_id)) {
+ for (;;) {
+
+ dest_desc = CE_DEST_RING_TO_DESC(dest_ring_base,
+ sw_index);
+
+ /*
+ * The following 2 reads are from non-cached memory
+ */
+ nbytes = dest_desc->nbytes;
+
+ /* If completion is invalid, break */
+ if (qdf_unlikely(nbytes == 0))
+ break;
+
+
+ /*
+ * Build the nbuf list from valid completions
+ */
+ nbuf = dest_ring->per_transfer_context[sw_index];
+
+ /*
+ * No lock is needed here, since this is the only thread
+ * that accesses the sw_index
+ */
+ sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index);
+
+ /*
+ * CAREFUL : Uncached write, but still less expensive,
+ * since most modern caches use "write-combining" to
+ * flush multiple cache-writes all at once.
+ */
+ dest_desc->nbytes = 0;
+
+ /*
+ * Per our understanding this is not required on our
+ * since we are doing the same cache invalidation
+ * operation on the same buffer twice in succession,
+ * without any modifiication to this buffer by CPU in
+ * between.
+ * However, this code with 2 syncs in succession has
+ * been undergoing some testing at a customer site,
+ * and seemed to be showing no problems so far. Would
+ * like to validate from the customer, that this line
+ * is really not required, before we remove this line
+ * completely.
+ */
+ paddr_lo = QDF_NBUF_CB_PADDR(nbuf);
+
+ OS_SYNC_SINGLE_FOR_CPU(scn->qdf_dev->dev, paddr_lo,
+ (skb_end_pointer(nbuf) - (nbuf)->data),
+ DMA_FROM_DEVICE);
+ qdf_nbuf_put_tail(nbuf, nbytes);
+
+ qdf_assert_always(nbuf->data != NULL);
+
+ cmpl_msdus[nbuf_cmpl_idx++] = nbuf;
+
+ /*
+ * we are not posting the buffers back instead
+ * reusing the buffers
+ */
+ if (nbuf_cmpl_idx == MSG_FLUSH_NUM) {
+ qdf_spin_unlock(&ce_state->ce_index_lock);
+ ce_fastpath_rx_handle(ce_state, cmpl_msdus,
+ MSG_FLUSH_NUM, ctrl_addr);
+ qdf_spin_lock(&ce_state->ce_index_lock);
+ nbuf_cmpl_idx = 0;
+ }
+
+ }
+
+ /*
+ * If there are not enough completions to fill the array,
+ * just call the message handler here
+ */
+ if (nbuf_cmpl_idx) {
+ qdf_spin_unlock(&ce_state->ce_index_lock);
+ ce_fastpath_rx_handle(ce_state, cmpl_msdus,
+ nbuf_cmpl_idx, ctrl_addr);
+ qdf_spin_lock(&ce_state->ce_index_lock);
+ nbuf_cmpl_idx = 0;
+ }
+ qdf_atomic_set(&ce_state->rx_pending, 0);
+ dest_ring->sw_index = sw_index;
+
+ CE_ENGINE_INT_STATUS_CLEAR(scn, ctrl_addr,
+ HOST_IS_COPY_COMPLETE_MASK);
+ }
+ ce_int_status = CE_ENGINE_INT_STATUS_GET(scn, ctrl_addr);
+ if (ce_int_status & CE_WATERMARK_MASK)
+ goto more_data;
+
+ return QDF_STATUS_SUCCESS;
+}
+
+#else
+static int
+ce_per_engine_service_fast(struct hif_softc *scn, int ce_id)
+{
+ return QDF_STATUS_E_FAILURE;
+}
+#endif /* WLAN_FEATURE_FASTPATH */
+
/*
* Number of times to check for any pending tx/rx completion on
* a copy engine, this count should be big enough. Once we hit
@@ -1310,6 +1520,17 @@
qdf_spin_lock(&CE_state->ce_index_lock);
+ /*
+ * With below check we make sure CE we are handling is datapath CE and
+ * fastpath is enabled.
+ */
+ if (ce_is_fastpath_handler_registered(CE_state))
+ /* For datapath only Rx CEs */
+ if (!ce_per_engine_service_fast(scn, CE_id)) {
+ qdf_spin_unlock(&CE_state->ce_index_lock);
+ return 0;
+ }
+
/* Clear force_break flag and re-initialize receive_count to 0 */
/* NAPI: scn variables- thread/multi-processing safety? */