drm/radeon/kms: don't require up to 64k allocations. (v2)

This avoids needing to do a kmalloc > PAGE_SIZE for the main
indirect buffer chunk, it adds an accessor for all reads from
the chunk and caches a single page at a time for subsequent
reads.

changes since v1:
Use a two page pool which should be the most common case
a single packet spanning > PAGE_SIZE will be hit, but I'm
having trouble seeing anywhere we currently generate anything like that.
hopefully proper short page copying at end
added parser_error flag to set deep errors instead of having to test
every ib value fetch.
fixed bug in patch that went to list.

Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/radeon_cs.c b/drivers/gpu/drm/radeon/radeon_cs.c
index 12f5990..dea8acf 100644
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@@ -142,15 +142,31 @@
 		}
 
 		p->chunks[i].length_dw = user_chunk.length_dw;
-		cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data;
+		p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data;
 
-		size = p->chunks[i].length_dw * sizeof(uint32_t);
-		p->chunks[i].kdata = kmalloc(size, GFP_KERNEL);
-		if (p->chunks[i].kdata == NULL) {
-			return -ENOMEM;
-		}
-		if (DRM_COPY_FROM_USER(p->chunks[i].kdata, cdata, size)) {
-			return -EFAULT;
+		cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data;
+		if (p->chunks[i].chunk_id != RADEON_CHUNK_ID_IB) {
+			size = p->chunks[i].length_dw * sizeof(uint32_t);
+			p->chunks[i].kdata = kmalloc(size, GFP_KERNEL);
+			if (p->chunks[i].kdata == NULL) {
+				return -ENOMEM;
+			}
+			if (DRM_COPY_FROM_USER(p->chunks[i].kdata,
+					       p->chunks[i].user_ptr, size)) {
+				return -EFAULT;
+			}
+		} else {
+			p->chunks[i].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
+			p->chunks[i].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
+			if (p->chunks[i].kpage[0] == NULL || p->chunks[i].kpage[1] == NULL) {
+				kfree(p->chunks[i].kpage[0]);
+				kfree(p->chunks[i].kpage[1]);
+				return -ENOMEM;
+			}
+			p->chunks[i].kpage_idx[0] = -1;
+			p->chunks[i].kpage_idx[1] = -1;
+			p->chunks[i].last_copied_page = -1;
+			p->chunks[i].last_page_index = ((p->chunks[i].length_dw * 4) - 1) / PAGE_SIZE;
 		}
 	}
 	if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) {
@@ -190,6 +206,8 @@
 	kfree(parser->relocs_ptr);
 	for (i = 0; i < parser->nchunks; i++) {
 		kfree(parser->chunks[i].kdata);
+		kfree(parser->chunks[i].kpage[0]);
+		kfree(parser->chunks[i].kpage[1]);
 	}
 	kfree(parser->chunks);
 	kfree(parser->chunks_array);
@@ -238,8 +256,14 @@
 	 * uncached). */
 	ib_chunk = &parser.chunks[parser.chunk_ib_idx];
 	parser.ib->length_dw = ib_chunk->length_dw;
-	memcpy((void *)parser.ib->ptr, ib_chunk->kdata, ib_chunk->length_dw*4);
 	r = radeon_cs_parse(&parser);
+	if (r || parser.parser_error) {
+		DRM_ERROR("Invalid command stream !\n");
+		radeon_cs_parser_fini(&parser, r);
+		mutex_unlock(&rdev->cs_mutex);
+		return r;
+	}
+	r = radeon_cs_finish_pages(&parser);
 	if (r) {
 		DRM_ERROR("Invalid command stream !\n");
 		radeon_cs_parser_fini(&parser, r);
@@ -254,3 +278,66 @@
 	mutex_unlock(&rdev->cs_mutex);
 	return r;
 }
+
+int radeon_cs_finish_pages(struct radeon_cs_parser *p)
+{
+	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
+	int i;
+	int size = PAGE_SIZE;
+
+	for (i = ibc->last_copied_page + 1; i <= ibc->last_page_index; i++) {
+		if (i == ibc->last_page_index) {
+			size = (ibc->length_dw * 4) % PAGE_SIZE;
+			if (size == 0)
+				size = PAGE_SIZE;
+		}
+		
+		if (DRM_COPY_FROM_USER(p->ib->ptr + (i * (PAGE_SIZE/4)),
+				       ibc->user_ptr + (i * PAGE_SIZE),
+				       size))
+			return -EFAULT;
+	}
+	return 0;
+}
+
+int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
+{
+	int new_page;
+	int num_extra_pages;
+	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
+	int i;
+	int size = PAGE_SIZE;
+
+	num_extra_pages = (pg_idx - ibc->last_copied_page - 1);
+	for (i = ibc->last_copied_page + 1; i < ibc->last_copied_page + num_extra_pages; i++) {
+		if (DRM_COPY_FROM_USER(p->ib->ptr + (i * (PAGE_SIZE/4)),
+				       ibc->user_ptr + (i * PAGE_SIZE),
+				       PAGE_SIZE)) {
+			p->parser_error = -EFAULT;
+			return 0;
+		}
+	}
+
+	new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
+
+	if (pg_idx == ibc->last_page_index) {
+		size = (ibc->length_dw * 4) % PAGE_SIZE;
+			if (size == 0)
+				size = PAGE_SIZE;
+	}
+
+	if (DRM_COPY_FROM_USER(ibc->kpage[new_page],
+			       ibc->user_ptr + (pg_idx * PAGE_SIZE),
+			       size)) {
+		p->parser_error = -EFAULT;
+		return 0;
+	}
+
+	/* copy to IB here */
+	memcpy((void *)(p->ib->ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
+
+	ibc->last_copied_page = pg_idx;
+	ibc->kpage_idx[new_page] = pg_idx;
+
+	return new_page;
+}