V4L/DVB (11618): cx18: Convert per stream mutex locks to per queue spin locks

To avoid sleeps in providing buffers to user space and in handling incoming
buffers from the capture unit, converted the per stream mutex for locking
queues to 3 spin locks.  There is now a spin lock per queue
to increase concurrency when moving buffers around.

Also simplified queue manipulations and buffer handling of incoming buffers
of data from the capture unit.

Signed-off-by: Andy Walls <awalls@radix.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h
index 62dca43..35a6758 100644
--- a/drivers/media/video/cx18/cx18-driver.h
+++ b/drivers/media/video/cx18/cx18-driver.h
@@ -286,6 +286,7 @@
 	struct list_head list;
 	atomic_t buffers;
 	u32 bytesused;
+	spinlock_t lock;
 };
 
 struct cx18_dvb {
@@ -365,7 +366,6 @@
 	unsigned mdl_offset;
 
 	u32 id;
-	struct mutex qlock; 	/* locks access to the queues */
 	unsigned long s_flags;	/* status flags, see above */
 	int dma;		/* can be PCI_DMA_TODEVICE,
 				   PCI_DMA_FROMDEVICE or
diff --git a/drivers/media/video/cx18/cx18-mailbox.c b/drivers/media/video/cx18/cx18-mailbox.c
index df7d61d..afe46c3 100644
--- a/drivers/media/video/cx18/cx18-mailbox.c
+++ b/drivers/media/video/cx18/cx18-mailbox.c
@@ -191,23 +191,24 @@
 		if (buf == NULL) {
 			CX18_WARN("Could not find buf %d for stream %s\n",
 				  id, s->name);
-			/* Put as many buffers as possible back into fw use */
-			cx18_stream_load_fw_queue(s);
 			continue;
 		}
 
-		if (s->type == CX18_ENC_STREAM_TYPE_TS && s->dvb.enabled) {
-			CX18_DEBUG_HI_DMA("TS recv bytesused = %d\n",
-					  buf->bytesused);
-			dvb_dmx_swfilter(&s->dvb.demux, buf->buf,
-					 buf->bytesused);
+		CX18_DEBUG_HI_DMA("%s recv bytesused = %d\n",
+				  s->name, buf->bytesused);
+
+		if (s->type != CX18_ENC_STREAM_TYPE_TS)
+			cx18_enqueue(s, buf, &s->q_full);
+		else {
+			if (s->dvb.enabled)
+				dvb_dmx_swfilter(&s->dvb.demux, buf->buf,
+						 buf->bytesused);
+			cx18_enqueue(s, buf, &s->q_free);
 		}
-		/* Put as many buffers as possible back into fw use */
-		cx18_stream_load_fw_queue(s);
-		/* Put back TS buffer, since it was removed from all queues */
-		if (s->type == CX18_ENC_STREAM_TYPE_TS)
-			cx18_stream_put_buf_fw(s, buf);
 	}
+	/* Put as many buffers as possible back into fw use */
+	cx18_stream_load_fw_queue(s);
+
 	wake_up(&cx->dma_waitq);
 	if (s->id != -1)
 		wake_up(&s->waitq);
diff --git a/drivers/media/video/cx18/cx18-queue.c b/drivers/media/video/cx18/cx18-queue.c
index 3046b8e..693a745 100644
--- a/drivers/media/video/cx18/cx18-queue.c
+++ b/drivers/media/video/cx18/cx18-queue.c
@@ -53,13 +53,13 @@
 		buf->skipped = 0;
 	}
 
-	mutex_lock(&s->qlock);
-
 	/* q_busy is restricted to a max buffer count imposed by firmware */
 	if (q == &s->q_busy &&
 	    atomic_read(&q->buffers) >= CX18_MAX_FW_MDLS_PER_STREAM)
 		q = &s->q_free;
 
+	spin_lock(&q->lock);
+
 	if (to_front)
 		list_add(&buf->list, &q->list); /* LIFO */
 	else
@@ -67,7 +67,7 @@
 	q->bytesused += buf->bytesused - buf->readpos;
 	atomic_inc(&q->buffers);
 
-	mutex_unlock(&s->qlock);
+	spin_unlock(&q->lock);
 	return q;
 }
 
@@ -75,7 +75,7 @@
 {
 	struct cx18_buffer *buf = NULL;
 
-	mutex_lock(&s->qlock);
+	spin_lock(&q->lock);
 	if (!list_empty(&q->list)) {
 		buf = list_first_entry(&q->list, struct cx18_buffer, list);
 		list_del_init(&buf->list);
@@ -83,7 +83,7 @@
 		buf->skipped = 0;
 		atomic_dec(&q->buffers);
 	}
-	mutex_unlock(&s->qlock);
+	spin_unlock(&q->lock);
 	return buf;
 }
 
@@ -94,9 +94,23 @@
 	struct cx18_buffer *buf;
 	struct cx18_buffer *tmp;
 	struct cx18_buffer *ret = NULL;
+	LIST_HEAD(sweep_up);
 
-	mutex_lock(&s->qlock);
+	/*
+	 * We don't have to acquire multiple q locks here, because we are
+	 * serialized by the single threaded work handler.
+	 * Buffers from the firmware will thus remain in order as
+	 * they are moved from q_busy to q_full or to the dvb ring buffer.
+	 */
+	spin_lock(&s->q_busy.lock);
 	list_for_each_entry_safe(buf, tmp, &s->q_busy.list, list) {
+		/*
+		 * We should find what the firmware told us is done,
+		 * right at the front of the queue.  If we don't, we likely have
+		 * missed a buffer done message from the firmware.
+		 * Once we skip a buffer repeatedly, relative to the size of
+		 * q_busy, we have high confidence we've missed it.
+		 */
 		if (buf->id != id) {
 			buf->skipped++;
 			if (buf->skipped >= atomic_read(&s->q_busy.buffers)-1) {
@@ -105,38 +119,41 @@
 					  "times - it must have dropped out of "
 					  "rotation\n", s->name, buf->id,
 					  buf->skipped);
-				/* move it to q_free */
-				list_move_tail(&buf->list, &s->q_free.list);
-				buf->bytesused = buf->readpos = buf->b_flags =
-					buf->skipped = 0;
+				/* Sweep it up to put it back into rotation */
+				list_move_tail(&buf->list, &sweep_up);
 				atomic_dec(&s->q_busy.buffers);
-				atomic_inc(&s->q_free.buffers);
 			}
 			continue;
 		}
-
-		buf->bytesused = bytesused;
-		/* Sync the buffer before we release the qlock */
-		cx18_buf_sync_for_cpu(s, buf);
-		if (s->type == CX18_ENC_STREAM_TYPE_TS) {
-			/*
-			 * TS doesn't use q_full.  As we pull the buffer off of
-			 * the queue here, the caller will have to put it back.
-			 */
-			list_del_init(&buf->list);
-		} else {
-			/* Move buffer from q_busy to q_full */
-			list_move_tail(&buf->list, &s->q_full.list);
-			set_bit(CX18_F_B_NEED_BUF_SWAP, &buf->b_flags);
-			s->q_full.bytesused += buf->bytesused;
-			atomic_inc(&s->q_full.buffers);
-		}
+		/*
+		 * We pull the desired buffer off of the queue here.  Something
+		 * will have to put it back on a queue later.
+		 */
+		list_del_init(&buf->list);
 		atomic_dec(&s->q_busy.buffers);
-
 		ret = buf;
 		break;
 	}
-	mutex_unlock(&s->qlock);
+	spin_unlock(&s->q_busy.lock);
+
+	/*
+	 * We found the buffer for which we were looking.  Get it ready for
+	 * the caller to put on q_full or in the dvb ring buffer.
+	 */
+	if (ret != NULL) {
+		ret->bytesused = bytesused;
+		ret->skipped = 0;
+		/* readpos and b_flags were 0'ed when the buf went on q_busy */
+		cx18_buf_sync_for_cpu(s, ret);
+		if (s->type != CX18_ENC_STREAM_TYPE_TS)
+			set_bit(CX18_F_B_NEED_BUF_SWAP, &ret->b_flags);
+	}
+
+	/* Put any buffers the firmware is ignoring back into normal rotation */
+	list_for_each_entry_safe(buf, tmp, &sweep_up, list) {
+		list_del_init(&buf->list);
+		cx18_enqueue(s, buf, &s->q_free);
+	}
 	return ret;
 }
 
@@ -148,7 +165,7 @@
 	if (q == &s->q_free)
 		return;
 
-	mutex_lock(&s->qlock);
+	spin_lock(&q->lock);
 	while (!list_empty(&q->list)) {
 		buf = list_first_entry(&q->list, struct cx18_buffer, list);
 		list_move_tail(&buf->list, &s->q_free.list);
@@ -156,7 +173,7 @@
 		atomic_inc(&s->q_free.buffers);
 	}
 	cx18_queue_init(q);
-	mutex_unlock(&s->qlock);
+	spin_unlock(&q->lock);
 }
 
 void cx18_flush_queues(struct cx18_stream *s)
diff --git a/drivers/media/video/cx18/cx18-streams.c b/drivers/media/video/cx18/cx18-streams.c
index bbeb01c..e1934e9 100644
--- a/drivers/media/video/cx18/cx18-streams.c
+++ b/drivers/media/video/cx18/cx18-streams.c
@@ -116,11 +116,13 @@
 	s->buffers = cx->stream_buffers[type];
 	s->buf_size = cx->stream_buf_size[type];
 
-	mutex_init(&s->qlock);
 	init_waitqueue_head(&s->waitq);
 	s->id = -1;
+	spin_lock_init(&s->q_free.lock);
 	cx18_queue_init(&s->q_free);
+	spin_lock_init(&s->q_busy.lock);
 	cx18_queue_init(&s->q_busy);
+	spin_lock_init(&s->q_full.lock);
 	cx18_queue_init(&s->q_full);
 }
 
@@ -685,13 +687,13 @@
 
 	/* Init all the cpu_mdls for this stream */
 	cx18_flush_queues(s);
-	mutex_lock(&s->qlock);
+	spin_lock(&s->q_free.lock);
 	list_for_each_entry(buf, &s->q_free.list, list) {
 		cx18_writel(cx, buf->dma_handle,
 					&cx->scb->cpu_mdl[buf->id].paddr);
 		cx18_writel(cx, s->buf_size, &cx->scb->cpu_mdl[buf->id].length);
 	}
-	mutex_unlock(&s->qlock);
+	spin_unlock(&s->q_free.lock);
 	_cx18_stream_load_fw_queue(s);
 
 	/* begin_capture */