drm/radeon/kms: don't require up to 64k allocations. (v2)

This avoids needing to do a kmalloc > PAGE_SIZE for the main
indirect buffer chunk, it adds an accessor for all reads from
the chunk and caches a single page at a time for subsequent
reads.

changes since v1:
Use a two page pool which should be the most common case
a single packet spanning > PAGE_SIZE will be hit, but I'm
having trouble seeing anywhere we currently generate anything like that.
hopefully proper short page copying at end
added parser_error flag to set deep errors instead of having to test
every ib value fetch.
fixed bug in patch that went to list.

Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 33b89cd..c629b5a 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -57,7 +57,7 @@
 			  idx, ib_chunk->length_dw);
 		return -EINVAL;
 	}
-	header = ib_chunk->kdata[idx];
+	header = radeon_get_ib_value(p, idx);
 	pkt->idx = idx;
 	pkt->type = CP_PACKET_GET_TYPE(header);
 	pkt->count = CP_PACKET_GET_COUNT(header);
@@ -98,7 +98,6 @@
 static int r600_cs_packet_next_reloc_mm(struct radeon_cs_parser *p,
 					struct radeon_cs_reloc **cs_reloc)
 {
-	struct radeon_cs_chunk *ib_chunk;
 	struct radeon_cs_chunk *relocs_chunk;
 	struct radeon_cs_packet p3reloc;
 	unsigned idx;
@@ -109,7 +108,6 @@
 		return -EINVAL;
 	}
 	*cs_reloc = NULL;
-	ib_chunk = &p->chunks[p->chunk_ib_idx];
 	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
 	r = r600_cs_packet_parse(p, &p3reloc, p->idx);
 	if (r) {
@@ -121,7 +119,7 @@
 			  p3reloc.idx);
 		return -EINVAL;
 	}
-	idx = ib_chunk->kdata[p3reloc.idx + 1];
+	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
 	if (idx >= relocs_chunk->length_dw) {
 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
 			  idx, relocs_chunk->length_dw);
@@ -146,7 +144,6 @@
 static int r600_cs_packet_next_reloc_nomm(struct radeon_cs_parser *p,
 					struct radeon_cs_reloc **cs_reloc)
 {
-	struct radeon_cs_chunk *ib_chunk;
 	struct radeon_cs_chunk *relocs_chunk;
 	struct radeon_cs_packet p3reloc;
 	unsigned idx;
@@ -157,7 +154,6 @@
 		return -EINVAL;
 	}
 	*cs_reloc = NULL;
-	ib_chunk = &p->chunks[p->chunk_ib_idx];
 	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
 	r = r600_cs_packet_parse(p, &p3reloc, p->idx);
 	if (r) {
@@ -169,7 +165,7 @@
 			  p3reloc.idx);
 		return -EINVAL;
 	}
-	idx = ib_chunk->kdata[p3reloc.idx + 1];
+	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
 	if (idx >= relocs_chunk->length_dw) {
 		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
 			  idx, relocs_chunk->length_dw);
@@ -218,7 +214,6 @@
 static int r600_packet3_check(struct radeon_cs_parser *p,
 				struct radeon_cs_packet *pkt)
 {
-	struct radeon_cs_chunk *ib_chunk;
 	struct radeon_cs_reloc *reloc;
 	volatile u32 *ib;
 	unsigned idx;
@@ -227,8 +222,8 @@
 	int r;
 
 	ib = p->ib->ptr;
-	ib_chunk = &p->chunks[p->chunk_ib_idx];
 	idx = pkt->idx + 1;
+
 	switch (pkt->opcode) {
 	case PACKET3_START_3D_CMDBUF:
 		if (p->family >= CHIP_RV770 || pkt->count) {
@@ -281,7 +276,7 @@
 			return -EINVAL;
 		}
 		/* bit 4 is reg (0) or mem (1) */
-		if (ib_chunk->kdata[idx+0] & 0x10) {
+		if (radeon_get_ib_value(p, idx) & 0x10) {
 			r = r600_cs_packet_next_reloc(p, &reloc);
 			if (r) {
 				DRM_ERROR("bad WAIT_REG_MEM\n");
@@ -297,8 +292,8 @@
 			return -EINVAL;
 		}
 		/* 0xffffffff/0x0 is flush all cache flag */
-		if (ib_chunk->kdata[idx+1] != 0xffffffff ||
-		    ib_chunk->kdata[idx+2] != 0) {
+		if (radeon_get_ib_value(p, idx + 1) != 0xffffffff ||
+		    radeon_get_ib_value(p, idx + 2) != 0) {
 			r = r600_cs_packet_next_reloc(p, &reloc);
 			if (r) {
 				DRM_ERROR("bad SURFACE_SYNC\n");
@@ -639,7 +634,6 @@
 	 * uncached). */
 	ib_chunk = &parser.chunks[parser.chunk_ib_idx];
 	parser.ib->length_dw = ib_chunk->length_dw;
-	memcpy((void *)parser.ib->ptr, ib_chunk->kdata, ib_chunk->length_dw*4);
 	*l = parser.ib->length_dw;
 	r = r600_cs_parse(&parser);
 	if (r) {
@@ -647,6 +641,12 @@
 		r600_cs_parser_fini(&parser, r);
 		return r;
 	}
+	r = radeon_cs_finish_pages(&parser);
+	if (r) {
+		DRM_ERROR("Invalid command stream !\n");
+		r600_cs_parser_fini(&parser, r);
+		return r;
+	}
 	r600_cs_parser_fini(&parser, r);
 	return r;
 }