[media] media: coda: allocate internal framebuffers separately from v4l2 buffers

Some codecs running on CODA need internal framebuffers for reference and
reconstructed frames. Allocate them separately, and do not use the input
vb2_buffers: those will be handed off to userspace regularly, and there
is no way to signal to the CODA which of the registered framebuffers are
off limits. As a consequence, userspace is now free to choose the number
of v4l2 buffers.
This patch also includes the code to set up the parameter buffer for
CODA7 and above with 64-bit AXI bus width.

Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Javier Martin <javier.martin@vista-silicon.com>
Tested-by: Javier Martin <javier.martin@vista-silicon.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
diff --git a/drivers/media/platform/coda.c b/drivers/media/platform/coda.c
index 53816d0..9223693b 100644
--- a/drivers/media/platform/coda.c
+++ b/drivers/media/platform/coda.c
@@ -45,8 +45,7 @@
 #define CODA_ISRAM_SIZE	(2048 * 2)
 #define CODA7_IRAM_SIZE		0x14000 /* 81920 bytes */
 
-#define CODA_OUTPUT_BUFS	4
-#define CODA_CAPTURE_BUFS	2
+#define CODA_MAX_FRAMEBUFFERS	2
 
 #define MAX_W		720
 #define MAX_H		576
@@ -164,11 +163,12 @@
 	struct v4l2_m2m_ctx		*m2m_ctx;
 	struct v4l2_ctrl_handler	ctrls;
 	struct v4l2_fh			fh;
-	struct vb2_buffer		*reference;
 	int				gopcounter;
 	char				vpu_header[3][64];
 	int				vpu_header_size[3];
 	struct coda_aux_buf		parabuf;
+	struct coda_aux_buf		internal_frames[CODA_MAX_FRAMEBUFFERS];
+	int				num_internal_frames;
 	int				idx;
 };
 
@@ -743,14 +743,6 @@
 		return 0;
 	}
 
-	/* For P frames a reference picture is needed too */
-	if ((ctx->gopcounter != (ctx->params.gop_size - 1)) &&
-	   !ctx->reference) {
-		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
-			 "not ready: reference picture not available.\n");
-		return 0;
-	}
-
 	if (coda_isbusy(ctx->dev)) {
 		v4l2_dbg(1, coda_debug, &ctx->dev->v4l2_dev,
 			 "not ready: coda is still busy.\n");
@@ -804,7 +796,6 @@
 	ctx->params.codec_mode = CODA_MODE_INVALID;
 	ctx->colorspace = V4L2_COLORSPACE_REC709;
 	ctx->params.framerate = 30;
-	ctx->reference = NULL;
 	ctx->aborting = 0;
 
 	/* Default formats for output and input queues */
@@ -830,7 +821,6 @@
 	unsigned int size;
 
 	if (vq->type == V4L2_BUF_TYPE_VIDEO_OUTPUT) {
-		*nbuffers = CODA_OUTPUT_BUFS;
 		if (fmt)
 			size = fmt->fmt.pix.width *
 				fmt->fmt.pix.height * 3 / 2;
@@ -838,7 +828,6 @@
 			size = MAX_W *
 				MAX_H * 3 / 2;
 	} else {
-		*nbuffers = CODA_CAPTURE_BUFS;
 		size = CODA_MAX_FRAME_SIZE;
 	}
 
@@ -891,6 +880,77 @@
 	coda_lock(ctx);
 }
 
+static void coda_free_framebuffers(struct coda_ctx *ctx)
+{
+	int i;
+
+	for (i = 0; i < CODA_MAX_FRAMEBUFFERS; i++) {
+		if (ctx->internal_frames[i].vaddr) {
+			dma_free_coherent(&ctx->dev->plat_dev->dev,
+				ctx->internal_frames[i].size,
+				ctx->internal_frames[i].vaddr,
+				ctx->internal_frames[i].paddr);
+			ctx->internal_frames[i].vaddr = NULL;
+		}
+	}
+}
+
+static int coda_alloc_framebuffers(struct coda_ctx *ctx, struct coda_q_data *q_data, u32 fourcc)
+{
+	struct coda_dev *dev = ctx->dev;
+
+	int height = q_data->height;
+	int width = q_data->width;
+	u32 *p;
+	int i;
+
+	/* Allocate frame buffers */
+	ctx->num_internal_frames = CODA_MAX_FRAMEBUFFERS;
+	for (i = 0; i < ctx->num_internal_frames; i++) {
+		ctx->internal_frames[i].size = q_data->sizeimage;
+		if (fourcc == V4L2_PIX_FMT_H264 && dev->devtype->product != CODA_DX6)
+			ctx->internal_frames[i].size += width / 2 * height / 2;
+		ctx->internal_frames[i].vaddr = dma_alloc_coherent(
+				&dev->plat_dev->dev, ctx->internal_frames[i].size,
+				&ctx->internal_frames[i].paddr, GFP_KERNEL);
+		if (!ctx->internal_frames[i].vaddr) {
+			coda_free_framebuffers(ctx);
+			return -ENOMEM;
+		}
+	}
+
+	/* Register frame buffers in the parameter buffer */
+	p = ctx->parabuf.vaddr;
+
+	if (dev->devtype->product == CODA_DX6) {
+		for (i = 0; i < ctx->num_internal_frames; i++) {
+			p[i * 3] = ctx->internal_frames[i].paddr; /* Y */
+			p[i * 3 + 1] = p[i * 3] + width * height; /* Cb */
+			p[i * 3 + 2] = p[i * 3 + 1] + width / 2 * height / 2; /* Cr */
+		}
+	} else {
+		for (i = 0; i < ctx->num_internal_frames; i += 2) {
+			p[i * 3 + 1] = ctx->internal_frames[i].paddr; /* Y */
+			p[i * 3] = p[i * 3 + 1] + width * height; /* Cb */
+			p[i * 3 + 3] = p[i * 3] + (width / 2) * (height / 2); /* Cr */
+
+			if (fourcc == V4L2_PIX_FMT_H264)
+				p[96 + i + 1] = p[i * 3 + 3] + (width / 2) * (height / 2);
+
+			if (i + 1 < ctx->num_internal_frames) {
+				p[i * 3 + 2] = ctx->internal_frames[i+1].paddr; /* Y */
+				p[i * 3 + 5] = p[i * 3 + 2] + width * height ; /* Cb */
+				p[i * 3 + 4] = p[i * 3 + 5] + (width / 2) * (height / 2); /* Cr */
+
+				if (fourcc == V4L2_PIX_FMT_H264)
+					p[96 + i] = p[i * 3 + 4] + (width / 2) * (height / 2);
+			}
+		}
+	}
+
+	return 0;
+}
+
 static int coda_start_streaming(struct vb2_queue *q, unsigned int count)
 {
 	struct coda_ctx *ctx = vb2_get_drv_priv(q);
@@ -898,11 +958,10 @@
 	u32 bitstream_buf, bitstream_size;
 	struct coda_dev *dev = ctx->dev;
 	struct coda_q_data *q_data_src, *q_data_dst;
-	u32 dst_fourcc;
 	struct vb2_buffer *buf;
-	struct vb2_queue *src_vq;
+	u32 dst_fourcc;
 	u32 value;
-	int i = 0;
+	int ret;
 
 	if (count < 1)
 		return -EINVAL;
@@ -1050,25 +1109,11 @@
 	if (coda_read(dev, CODA_RET_ENC_SEQ_SUCCESS) == 0)
 		return -EFAULT;
 
-	/*
-	 * Walk the src buffer list and let the codec know the
-	 * addresses of the pictures.
-	 */
-	src_vq = v4l2_m2m_get_vq(ctx->m2m_ctx, V4L2_BUF_TYPE_VIDEO_OUTPUT);
-	for (i = 0; i < src_vq->num_buffers; i++) {
-		u32 *p;
+	ret = coda_alloc_framebuffers(ctx, q_data_src, dst_fourcc);
+	if (ret < 0)
+		return ret;
 
-		buf = src_vq->bufs[i];
-		p = ctx->parabuf.vaddr;
-
-		p[i * 3] = vb2_dma_contig_plane_dma_addr(buf, 0);
-		p[i * 3 + 1] = p[i * 3] + q_data_src->width *
-				q_data_src->height;
-		p[i * 3 + 2] = p[i * 3 + 1] + q_data_src->width / 2 *
-				q_data_src->height / 2;
-	}
-
-	coda_write(dev, src_vq->num_buffers, CODA_CMD_SET_FRAME_BUF_NUM);
+	coda_write(dev, ctx->num_internal_frames, CODA_CMD_SET_FRAME_BUF_NUM);
 	coda_write(dev, round_up(q_data_src->width, 8), CODA_CMD_SET_FRAME_BUF_STRIDE);
 	if (dev->devtype->product != CODA_DX6) {
 		coda_write(dev, round_up(q_data_src->width, 8), CODA7_CMD_SET_FRAME_SOURCE_BUF_STRIDE);
@@ -1191,6 +1236,8 @@
 				 "CODA_COMMAND_SEQ_END failed\n");
 			return -ETIMEDOUT;
 		}
+
+		coda_free_framebuffers(ctx);
 	}
 
 	return 0;
@@ -1437,7 +1484,7 @@
 
 static irqreturn_t coda_irq_handler(int irq, void *data)
 {
-	struct vb2_buffer *src_buf, *dst_buf, *tmp_buf;
+	struct vb2_buffer *src_buf, *dst_buf;
 	struct coda_dev *dev = data;
 	u32 wr_ptr, start_ptr;
 	struct coda_ctx *ctx;
@@ -1465,8 +1512,8 @@
 		return IRQ_NONE;
 	}
 
-	src_buf = v4l2_m2m_next_src_buf(ctx->m2m_ctx);
-	dst_buf = v4l2_m2m_next_dst_buf(ctx->m2m_ctx);
+	src_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx);
+	dst_buf = v4l2_m2m_dst_buf_remove(ctx->m2m_ctx);
 
 	/* Get results from the coda */
 	coda_read(dev, CODA_RET_ENC_PIC_TYPE);
@@ -1496,23 +1543,7 @@
 		dst_buf->v4l2_buf.flags &= ~V4L2_BUF_FLAG_KEYFRAME;
 	}
 
-	/* Free previous reference picture if available */
-	if (ctx->reference) {
-		v4l2_m2m_buf_done(ctx->reference, VB2_BUF_STATE_DONE);
-		ctx->reference = NULL;
-	}
-
-	/*
-	 * For the last frame of the gop we don't need to save
-	 * a reference picture.
-	 */
-	v4l2_m2m_dst_buf_remove(ctx->m2m_ctx);
-	tmp_buf = v4l2_m2m_src_buf_remove(ctx->m2m_ctx);
-	if (ctx->gopcounter == 0)
-		v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
-	else
-		ctx->reference = tmp_buf;
-
+	v4l2_m2m_buf_done(src_buf, VB2_BUF_STATE_DONE);
 	v4l2_m2m_buf_done(dst_buf, VB2_BUF_STATE_DONE);
 
 	ctx->gopcounter--;