V4L/DVB (11616): cx18: Add a work queue for deferring empty buffer handoffs to the firmware

This change defers sending all CX18_CPU_DE_SET_MDL commands, for a stream with
an ongoing capture, by adding a work queue to handle sending such commands when
needed.  This prevents any sleeps, caused by notifying the firmware of new
usable buffers, when a V4L2 application read() is being satisfied or when
an incoming buffer is processed by the cx18-NN-in work queue thread.

Signed-off-by: Andy Walls <awalls@radix.net>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c
index 7975020..658cfbb 100644
--- a/drivers/media/video/cx18/cx18-driver.c
+++ b/drivers/media/video/cx18/cx18-driver.c
@@ -546,6 +546,47 @@
 	cx->card_i2c = cx->card->i2c;
 }
 
+static int __devinit cx18_create_in_workq(struct cx18 *cx)
+{
+	snprintf(cx->in_workq_name, sizeof(cx->in_workq_name), "%s-in",
+		 cx->v4l2_dev.name);
+	cx->in_work_queue = create_singlethread_workqueue(cx->in_workq_name);
+	if (cx->in_work_queue == NULL) {
+		CX18_ERR("Unable to create incoming mailbox handler thread\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static int __devinit cx18_create_out_workq(struct cx18 *cx)
+{
+	snprintf(cx->out_workq_name, sizeof(cx->out_workq_name), "%s-out",
+		 cx->v4l2_dev.name);
+	cx->out_work_queue = create_workqueue(cx->out_workq_name);
+	if (cx->out_work_queue == NULL) {
+		CX18_ERR("Unable to create outgoing mailbox handler threads\n");
+		return -ENOMEM;
+	}
+	return 0;
+}
+
+static void __devinit cx18_init_in_work_orders(struct cx18 *cx)
+{
+	int i;
+	for (i = 0; i < CX18_MAX_IN_WORK_ORDERS; i++) {
+		cx->in_work_order[i].cx = cx;
+		cx->in_work_order[i].str = cx->epu_debug_str;
+		INIT_WORK(&cx->in_work_order[i].work, cx18_in_work_handler);
+	}
+}
+
+static void __devinit cx18_init_out_work_orders(struct cx18 *cx)
+{
+	int i;
+	for (i = 0; i < CX18_MAX_OUT_WORK_ORDERS; i++)
+		INIT_WORK(&cx->out_work_order[i].work, cx18_out_work_handler);
+}
+
 /* Precondition: the cx18 structure has been memset to 0. Only
    the dev and instance fields have been filled in.
    No assumptions on the card type may be made here (see cx18_init_struct2
@@ -553,7 +594,7 @@
  */
 static int __devinit cx18_init_struct1(struct cx18 *cx)
 {
-	int i;
+	int ret;
 
 	cx->base_addr = pci_resource_start(cx->pci_dev, 0);
 
@@ -562,19 +603,18 @@
 	mutex_init(&cx->epu2apu_mb_lock);
 	mutex_init(&cx->epu2cpu_mb_lock);
 
-	snprintf(cx->in_workq_name, sizeof(cx->in_workq_name), "%s-in",
-		 cx->v4l2_dev.name);
-	cx->in_work_queue = create_singlethread_workqueue(cx->in_workq_name);
-	if (cx->in_work_queue == NULL) {
-		CX18_ERR("Unable to create incoming mailbox handler thread\n");
-		return -ENOMEM;
+	ret = cx18_create_out_workq(cx);
+	if (ret)
+		return ret;
+
+	ret = cx18_create_in_workq(cx);
+	if (ret) {
+		destroy_workqueue(cx->out_work_queue);
+		return ret;
 	}
 
-	for (i = 0; i < CX18_MAX_IN_WORK_ORDERS; i++) {
-		cx->in_work_order[i].cx = cx;
-		cx->in_work_order[i].str = cx->epu_debug_str;
-		INIT_WORK(&cx->in_work_order[i].work, cx18_in_work_handler);
-	}
+	cx18_init_out_work_orders(cx);
+	cx18_init_in_work_orders(cx);
 
 	/* start counting open_id at 1 */
 	cx->open_id = 1;
@@ -761,17 +801,17 @@
 		retval = -ENODEV;
 		goto err;
 	}
-	if (cx18_init_struct1(cx)) {
-		retval = -ENOMEM;
+
+	retval = cx18_init_struct1(cx);
+	if (retval)
 		goto err;
-	}
 
 	CX18_DEBUG_INFO("base addr: 0x%08x\n", cx->base_addr);
 
 	/* PCI Device Setup */
 	retval = cx18_setup_pci(cx, pci_dev, pci_id);
 	if (retval != 0)
-		goto free_workqueue;
+		goto free_workqueues;
 
 	/* map io memory */
 	CX18_DEBUG_INFO("attempting ioremap at 0x%08x len 0x%08x\n",
@@ -945,8 +985,9 @@
 	cx18_iounmap(cx);
 free_mem:
 	release_mem_region(cx->base_addr, CX18_MEM_SIZE);
-free_workqueue:
+free_workqueues:
 	destroy_workqueue(cx->in_work_queue);
+	destroy_workqueue(cx->out_work_queue);
 err:
 	if (retval == 0)
 		retval = -ENODEV;
@@ -1075,15 +1116,26 @@
 	if (atomic_read(&cx->tot_capturing) > 0)
 		cx18_stop_all_captures(cx);
 
-	/* Interrupts */
+	/* Stop interrupts that cause incoming work to be queued */
 	cx18_sw1_irq_disable(cx, IRQ_CPU_TO_EPU | IRQ_APU_TO_EPU);
+
+	/* Incoming work can cause outgoing work, so clean up incoming first */
+	cx18_cancel_in_work_orders(cx);
+
+	/*
+	 * An outgoing work order can have the only pointer to a dynamically
+	 * allocated buffer, so we need to flush outgoing work and not just
+	 * cancel it, so we don't lose the pointer and leak memory.
+	 */
+	flush_workqueue(cx->out_work_queue);
+
+	/* Stop ack interrupts that may have been needed for work to finish */
 	cx18_sw2_irq_disable(cx, IRQ_CPU_TO_EPU_ACK | IRQ_APU_TO_EPU_ACK);
 
 	cx18_halt_firmware(cx);
 
-	cx18_cancel_in_work_orders(cx);
-
 	destroy_workqueue(cx->in_work_queue);
+	destroy_workqueue(cx->out_work_queue);
 
 	cx18_streams_cleanup(cx, 1);
 
diff --git a/drivers/media/video/cx18/cx18-driver.h b/drivers/media/video/cx18/cx18-driver.h
index e6f42d0..62dca43 100644
--- a/drivers/media/video/cx18/cx18-driver.h
+++ b/drivers/media/video/cx18/cx18-driver.h
@@ -254,6 +254,7 @@
 #define CX18_F_S_INTERNAL_USE	5	/* this stream is used internally (sliced VBI processing) */
 #define CX18_F_S_STREAMOFF	7	/* signal end of stream EOS */
 #define CX18_F_S_APPL_IO        8	/* this stream is used read/written by an application */
+#define CX18_F_S_STOPPING	9	/* telling the fw to stop capturing */
 
 /* per-cx18, i_flags */
 #define CX18_F_I_LOADED_FW		0 	/* Loaded firmware 1st time */
@@ -324,6 +325,33 @@
 	char *str;
 };
 
+/*
+ * There are 2 types of deferrable tasks that send messages out to the firmware:
+ * 1. Sending individual buffers back to the firmware
+ * 2. Sending as many free buffers for a stream from q_free as we can to the fw
+ *
+ * The worst case scenario for multiple simultaneous streams is
+ * TS, YUV, PCM, VBI, MPEG, and IDX all going at once.
+ *
+ * We try to load the firmware queue with as many free buffers as possible,
+ * whenever we get a buffer back for a stream.  For the TS we return the single
+ * buffer to the firmware at that time as well.  For all other streams, we
+ * return single buffers to the firmware as the application drains them.
+ *
+ * 6 streams * 2 sets of orders * (1 single buf + 1 load fw from q_free)
+ * = 24 work orders should cover our needs, provided the applications read
+ * at a fairly steady rate.  If apps don't, we fall back to non-deferred
+ * operation, when no cx18_out_work_orders are available for use.
+ */
+#define CX18_MAX_OUT_WORK_ORDERS (24)
+
+struct cx18_out_work_order {
+	struct work_struct work;
+	atomic_t pending;
+	struct cx18_stream *s;
+	struct cx18_buffer *buf; /* buf == NULL, means load fw from q_free */
+};
+
 #define CX18_INVALID_TASK_HANDLE 0xffffffff
 
 struct cx18_stream {
@@ -573,6 +601,10 @@
 	struct cx18_in_work_order in_work_order[CX18_MAX_IN_WORK_ORDERS];
 	char epu_debug_str[256]; /* CX18_EPU_DEBUG is rare: use shared space */
 
+	struct workqueue_struct *out_work_queue;
+	char out_workq_name[12]; /* "cx18-NN-out" */
+	struct cx18_out_work_order out_work_order[CX18_MAX_OUT_WORK_ORDERS];
+
 	/* i2c */
 	struct i2c_adapter i2c_adap[2];
 	struct i2c_algo_bit_data i2c_algo[2];
diff --git a/drivers/media/video/cx18/cx18-streams.c b/drivers/media/video/cx18/cx18-streams.c
index 0932b76..bbeb01c 100644
--- a/drivers/media/video/cx18/cx18-streams.c
+++ b/drivers/media/video/cx18/cx18-streams.c
@@ -431,14 +431,16 @@
 	cx18_api(cx, CX18_CPU_SET_RAW_VBI_PARAM, 6, data);
 }
 
-struct cx18_queue *cx18_stream_put_buf_fw(struct cx18_stream *s,
-					  struct cx18_buffer *buf)
+static
+struct cx18_queue *_cx18_stream_put_buf_fw(struct cx18_stream *s,
+					   struct cx18_buffer *buf)
 {
 	struct cx18 *cx = s->cx;
 	struct cx18_queue *q;
 
 	/* Don't give it to the firmware, if we're not running a capture */
 	if (s->handle == CX18_INVALID_TASK_HANDLE ||
+	    test_bit(CX18_F_S_STOPPING, &s->s_flags) ||
 	    !test_bit(CX18_F_S_STREAMING, &s->s_flags))
 		return cx18_enqueue(s, buf, &s->q_free);
 
@@ -453,7 +455,8 @@
 	return q;
 }
 
-void cx18_stream_load_fw_queue(struct cx18_stream *s)
+static
+void _cx18_stream_load_fw_queue(struct cx18_stream *s)
 {
 	struct cx18_queue *q;
 	struct cx18_buffer *buf;
@@ -467,11 +470,93 @@
 		buf = cx18_dequeue(s, &s->q_free);
 		if (buf == NULL)
 			break;
-		q = cx18_stream_put_buf_fw(s, buf);
+		q = _cx18_stream_put_buf_fw(s, buf);
 	} while (atomic_read(&s->q_busy.buffers) < CX18_MAX_FW_MDLS_PER_STREAM
 		 && q == &s->q_busy);
 }
 
+static inline
+void free_out_work_order(struct cx18_out_work_order *order)
+{
+	atomic_set(&order->pending, 0);
+}
+
+void cx18_out_work_handler(struct work_struct *work)
+{
+	struct cx18_out_work_order *order =
+			container_of(work, struct cx18_out_work_order, work);
+	struct cx18_stream *s = order->s;
+	struct cx18_buffer *buf = order->buf;
+
+	free_out_work_order(order);
+
+	if (buf == NULL)
+		_cx18_stream_load_fw_queue(s);
+	else
+		_cx18_stream_put_buf_fw(s, buf);
+}
+
+static
+struct cx18_out_work_order *alloc_out_work_order(struct cx18 *cx)
+{
+	int i;
+	struct cx18_out_work_order *order = NULL;
+
+	for (i = 0; i < CX18_MAX_OUT_WORK_ORDERS; i++) {
+		/*
+		 * We need "pending" to be atomic to inspect & set its contents
+		 * 1. "pending" is only set to 1 here, but needs multiple access
+		 * protection
+		 * 2. work handler threads only clear "pending" and only
+		 * on one, particular work order at a time, per handler thread.
+		 */
+		if (atomic_add_unless(&cx->out_work_order[i].pending, 1, 1)) {
+			order = &cx->out_work_order[i];
+			break;
+		}
+	}
+	return order;
+}
+
+struct cx18_queue *cx18_stream_put_buf_fw(struct cx18_stream *s,
+					  struct cx18_buffer *buf)
+{
+	struct cx18 *cx = s->cx;
+	struct cx18_out_work_order *order;
+
+	order = alloc_out_work_order(cx);
+	if (order == NULL) {
+		CX18_DEBUG_WARN("No blank, outgoing-mailbox, deferred-work, "
+				"order forms available; sending buffer %u back "
+				"to the firmware immediately for stream %s\n",
+				buf->id, s->name);
+		return _cx18_stream_put_buf_fw(s, buf);
+	}
+	order->s = s;
+	order->buf = buf;
+	queue_work(cx->out_work_queue, &order->work);
+	return NULL;
+}
+
+void cx18_stream_load_fw_queue(struct cx18_stream *s)
+{
+	struct cx18 *cx = s->cx;
+	struct cx18_out_work_order *order;
+
+	order = alloc_out_work_order(cx);
+	if (order == NULL) {
+		CX18_DEBUG_WARN("No blank, outgoing-mailbox, deferred-work, "
+				"order forms available; filling the firmware "
+				"buffer queue immediately for stream %s\n",
+				s->name);
+		_cx18_stream_load_fw_queue(s);
+		return;
+	}
+	order->s = s;
+	order->buf = NULL; /* Indicates to load the fw queue */
+	queue_work(cx->out_work_queue, &order->work);
+}
+
 int cx18_start_v4l2_encode_stream(struct cx18_stream *s)
 {
 	u32 data[MAX_MB_ARGUMENTS];
@@ -607,12 +692,13 @@
 		cx18_writel(cx, s->buf_size, &cx->scb->cpu_mdl[buf->id].length);
 	}
 	mutex_unlock(&s->qlock);
-	cx18_stream_load_fw_queue(s);
+	_cx18_stream_load_fw_queue(s);
 
 	/* begin_capture */
 	if (cx18_vapi(cx, CX18_CPU_CAPTURE_START, 1, s->handle)) {
 		CX18_DEBUG_WARN("Error starting capture!\n");
 		/* Ensure we're really not capturing before releasing MDLs */
+		set_bit(CX18_F_S_STOPPING, &s->s_flags);
 		if (s->type == CX18_ENC_STREAM_TYPE_MPG)
 			cx18_vapi(cx, CX18_CPU_CAPTURE_STOP, 2, s->handle, 1);
 		else
@@ -622,6 +708,7 @@
 		cx18_vapi(cx, CX18_CPU_DE_RELEASE_MDL, 1, s->handle);
 		cx18_vapi(cx, CX18_DESTROY_TASK, 1, s->handle);
 		s->handle = CX18_INVALID_TASK_HANDLE;
+		clear_bit(CX18_F_S_STOPPING, &s->s_flags);
 		if (atomic_read(&cx->tot_capturing) == 0) {
 			set_bit(CX18_F_I_EOS, &cx->i_flags);
 			cx18_write_reg(cx, 5, CX18_DSP0_INTERRUPT_MASK);
@@ -666,6 +753,7 @@
 	if (atomic_read(&cx->tot_capturing) == 0)
 		return 0;
 
+	set_bit(CX18_F_S_STOPPING, &s->s_flags);
 	if (s->type == CX18_ENC_STREAM_TYPE_MPG)
 		cx18_vapi(cx, CX18_CPU_CAPTURE_STOP, 2, s->handle, !gop_end);
 	else
@@ -689,6 +777,7 @@
 
 	cx18_vapi(cx, CX18_DESTROY_TASK, 1, s->handle);
 	s->handle = CX18_INVALID_TASK_HANDLE;
+	clear_bit(CX18_F_S_STOPPING, &s->s_flags);
 
 	if (atomic_read(&cx->tot_capturing) > 0)
 		return 0;
diff --git a/drivers/media/video/cx18/cx18-streams.h b/drivers/media/video/cx18/cx18-streams.h
index 420e0a1..1fdcfff 100644
--- a/drivers/media/video/cx18/cx18-streams.h
+++ b/drivers/media/video/cx18/cx18-streams.h
@@ -28,10 +28,13 @@
 int cx18_streams_register(struct cx18 *cx);
 void cx18_streams_cleanup(struct cx18 *cx, int unregister);
 
-/* Capture related */
+/* Related to submission of buffers to firmware */
 void cx18_stream_load_fw_queue(struct cx18_stream *s);
 struct cx18_queue *cx18_stream_put_buf_fw(struct cx18_stream *s,
 					  struct cx18_buffer *buf);
+void cx18_out_work_handler(struct work_struct *work);
+
+/* Capture related */
 int cx18_start_v4l2_encode_stream(struct cx18_stream *s);
 int cx18_stop_v4l2_encode_stream(struct cx18_stream *s, int gop_end);