drm/radeon: add support for evergreen/ni tiling informations v11

evergreen and northern island gpu needs more informations for 2D tiling
than previous r6xx/r7xx. Add field to tiling ioctl to allow userspace
to provide those.

The v8 cs checking change to track color view on r6xx/r7xx doesn't
affect old userspace as old userspace always emited 0 for this register.

v2 fix r6xx/r7xx 2D tiling computation
v3 fix r6xx/r7xx height align for untiled surface & add support for
   tile split on evergreen and newer
v4 improve tiling debugging output
v5 fix tile split code for evergreen and newer
v6 set proper tile split for crtc register
v7 fix tile split limit value
v8 add COLOR_VIEW checking to r6xx/r7xx checker, add evergreen cs
   checking, update safe reg for r600, evergreen and cayman.
   Evergreen checking need some work around for stencil alignment
   issues
v9 fix tile split value range, fix compressed texture handling and
   mipmap calculation, allow evergreen check to be silencious in
   front of current broken userspace (depth/stencil alignment issue)
v10 fix eg 3d texture and compressed texture, fix r600 depth array,
    fix r600 color view computation, add support for evergreen stencil
    split
v11 more verbose debugging in some case

Signed-off-by: Jerome Glisse <jglisse@redhat.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/r600_cs.c b/drivers/gpu/drm/radeon/r600_cs.c
index 9f17571..5cbe948 100644
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@@ -55,6 +55,7 @@
 	struct radeon_bo	*cb_color_frag_bo[8];
 	struct radeon_bo	*cb_color_tile_bo[8];
 	u32			cb_color_info[8];
+	u32			cb_color_view[8];
 	u32			cb_color_size_idx[8];
 	u32			cb_target_mask;
 	u32			cb_shader_mask;
@@ -77,9 +78,9 @@
 
 #define FMT_8_BIT(fmt, vc)   [fmt] = { 1, 1, 1, vc, CHIP_R600 }
 #define FMT_16_BIT(fmt, vc)  [fmt] = { 1, 1, 2, vc, CHIP_R600 }
-#define FMT_24_BIT(fmt)      [fmt] = { 1, 1, 3,  0, CHIP_R600 }
+#define FMT_24_BIT(fmt)      [fmt] = { 1, 1, 4,  0, CHIP_R600 }
 #define FMT_32_BIT(fmt, vc)  [fmt] = { 1, 1, 4, vc, CHIP_R600 }
-#define FMT_48_BIT(fmt)      [fmt] = { 1, 1, 6,  0, CHIP_R600 }
+#define FMT_48_BIT(fmt)      [fmt] = { 1, 1, 8,  0, CHIP_R600 }
 #define FMT_64_BIT(fmt, vc)  [fmt] = { 1, 1, 8, vc, CHIP_R600 }
 #define FMT_96_BIT(fmt)      [fmt] = { 1, 1, 12, 0, CHIP_R600 }
 #define FMT_128_BIT(fmt, vc) [fmt] = { 1, 1, 16,vc, CHIP_R600 }
@@ -111,7 +112,7 @@
 
 	/* 24-bit */
 	FMT_24_BIT(V_038004_FMT_8_8_8),
-					       
+
 	/* 32-bit */
 	FMT_32_BIT(V_038004_COLOR_32, 1),
 	FMT_32_BIT(V_038004_COLOR_32_FLOAT, 1),
@@ -166,22 +167,22 @@
 	[V_038004_FMT_32_AS_32_32_32_32] = { 1, 1, 4, 0, CHIP_CEDAR},
 };
 
-static bool fmt_is_valid_color(u32 format)
+bool r600_fmt_is_valid_color(u32 format)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return false;
-	
+
 	if (color_formats_table[format].valid_color)
 		return true;
 
 	return false;
 }
 
-static bool fmt_is_valid_texture(u32 format, enum radeon_family family)
+bool r600_fmt_is_valid_texture(u32 format, enum radeon_family family)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return false;
-	
+
 	if (family < color_formats_table[format].min_family)
 		return false;
 
@@ -191,7 +192,7 @@
 	return false;
 }
 
-static int fmt_get_blocksize(u32 format)
+int r600_fmt_get_blocksize(u32 format)
 {
 	if (format >= ARRAY_SIZE(color_formats_table))
 		return 0;
@@ -199,7 +200,7 @@
 	return color_formats_table[format].blocksize;
 }
 
-static int fmt_get_nblocksx(u32 format, u32 w)
+int r600_fmt_get_nblocksx(u32 format, u32 w)
 {
 	unsigned bw;
 
@@ -213,7 +214,7 @@
 	return (w + bw - 1) / bw;
 }
 
-static int fmt_get_nblocksy(u32 format, u32 h)
+int r600_fmt_get_nblocksy(u32 format, u32 h)
 {
 	unsigned bh;
 
@@ -260,7 +261,7 @@
 		break;
 	case ARRAY_LINEAR_ALIGNED:
 		*pitch_align = max((u32)64, (u32)(values->group_size / values->blocksize));
-		*height_align = tile_height;
+		*height_align = 1;
 		*depth_align = 1;
 		*base_align = values->group_size;
 		break;
@@ -273,10 +274,9 @@
 		*base_align = values->group_size;
 		break;
 	case ARRAY_2D_TILED_THIN1:
-		*pitch_align = max((u32)macro_tile_width,
-				  (u32)(((values->group_size / tile_height) /
-					 (values->blocksize * values->nsamples)) *
-					values->nbanks)) * tile_width;
+		*pitch_align = max((u32)macro_tile_width * tile_width,
+				(u32)((values->group_size * values->nbanks) /
+				(values->blocksize * values->nsamples * tile_width)));
 		*height_align = macro_tile_height * tile_height;
 		*depth_align = 1;
 		*base_align = max(macro_tile_bytes,
@@ -300,6 +300,7 @@
 		track->cb_color_size[i] = 0;
 		track->cb_color_size_idx[i] = 0;
 		track->cb_color_info[i] = 0;
+		track->cb_color_view[i] = 0xFFFFFFFF;
 		track->cb_color_bo[i] = NULL;
 		track->cb_color_bo_offset[i] = 0xFFFFFFFF;
 		track->cb_color_bo_mc[i] = 0xFFFFFFFF;
@@ -333,13 +334,14 @@
 	volatile u32 *ib = p->ib->ptr;
 	unsigned array_mode;
 	u32 format;
+
 	if (G_0280A0_TILE_MODE(track->cb_color_info[i])) {
 		dev_warn(p->dev, "FMASK or CMASK buffer are not supported by this kernel\n");
 		return -EINVAL;
 	}
 	size = radeon_bo_size(track->cb_color_bo[i]) - track->cb_color_bo_offset[i];
 	format = G_0280A0_FORMAT(track->cb_color_info[i]);
-	if (!fmt_is_valid_color(format)) {
+	if (!r600_fmt_is_valid_color(format)) {
 		dev_warn(p->dev, "%s:%d cb invalid format %d for %d (0x%08X)\n",
 			 __func__, __LINE__, format,
 			i, track->cb_color_info[i]);
@@ -360,7 +362,7 @@
 	array_check.nbanks = track->nbanks;
 	array_check.npipes = track->npipes;
 	array_check.nsamples = track->nsamples;
-	array_check.blocksize = fmt_get_blocksize(format);
+	array_check.blocksize = r600_fmt_get_blocksize(format);
 	if (r600_get_array_mode_alignment(&array_check,
 					  &pitch_align, &height_align, &depth_align, &base_align)) {
 		dev_warn(p->dev, "%s invalid tiling %d for %d (0x%08X)\n", __func__,
@@ -404,7 +406,18 @@
 	}
 
 	/* check offset */
-	tmp = fmt_get_nblocksy(format, height) * fmt_get_nblocksx(format, pitch) * fmt_get_blocksize(format);
+	tmp = r600_fmt_get_nblocksy(format, height) * r600_fmt_get_nblocksx(format, pitch) * r600_fmt_get_blocksize(format);
+	switch (array_mode) {
+	default:
+	case V_0280A0_ARRAY_LINEAR_GENERAL:
+	case V_0280A0_ARRAY_LINEAR_ALIGNED:
+		tmp += track->cb_color_view[i] & 0xFF;
+		break;
+	case V_0280A0_ARRAY_1D_TILED_THIN1:
+	case V_0280A0_ARRAY_2D_TILED_THIN1:
+		tmp += G_028080_SLICE_MAX(track->cb_color_view[i]) * tmp;
+		break;
+	}
 	if ((tmp + track->cb_color_bo_offset[i]) > radeon_bo_size(track->cb_color_bo[i])) {
 		if (array_mode == V_0280A0_ARRAY_LINEAR_GENERAL) {
 			/* the initial DDX does bad things with the CB size occasionally */
@@ -414,10 +427,13 @@
 			 * broken userspace.
 			 */
 		} else {
-			dev_warn(p->dev, "%s offset[%d] %d %d %d %lu too big\n", __func__, i,
-				 array_mode,
+			dev_warn(p->dev, "%s offset[%d] %d %d %d %lu too big (%d %d) (%d %d %d)\n",
+				 __func__, i, array_mode,
 				 track->cb_color_bo_offset[i], tmp,
-				 radeon_bo_size(track->cb_color_bo[i]));
+				 radeon_bo_size(track->cb_color_bo[i]),
+				 pitch, height, r600_fmt_get_nblocksx(format, pitch),
+				 r600_fmt_get_nblocksy(format, height),
+				 r600_fmt_get_blocksize(format));
 			return -EINVAL;
 		}
 	}
@@ -1075,6 +1091,17 @@
 			track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
 		}
 		break;
+	case R_028080_CB_COLOR0_VIEW:
+	case R_028084_CB_COLOR1_VIEW:
+	case R_028088_CB_COLOR2_VIEW:
+	case R_02808C_CB_COLOR3_VIEW:
+	case R_028090_CB_COLOR4_VIEW:
+	case R_028094_CB_COLOR5_VIEW:
+	case R_028098_CB_COLOR6_VIEW:
+	case R_02809C_CB_COLOR7_VIEW:
+		tmp = (reg - R_028080_CB_COLOR0_VIEW) / 4;
+		track->cb_color_view[tmp] = radeon_get_ib_value(p, idx);
+		break;
 	case R_028060_CB_COLOR0_SIZE:
 	case R_028064_CB_COLOR1_SIZE:
 	case R_028068_CB_COLOR2_SIZE:
@@ -1259,7 +1286,7 @@
 	return 0;
 }
 
-static unsigned mip_minify(unsigned size, unsigned level)
+unsigned r600_mip_minify(unsigned size, unsigned level)
 {
 	unsigned val;
 
@@ -1281,22 +1308,22 @@
 	unsigned nlevels = llevel - blevel + 1;
 
 	*l0_size = -1;
-	blocksize = fmt_get_blocksize(format);
+	blocksize = r600_fmt_get_blocksize(format);
 
-	w0 = mip_minify(w0, 0);
-	h0 = mip_minify(h0, 0);
-	d0 = mip_minify(d0, 0);
+	w0 = r600_mip_minify(w0, 0);
+	h0 = r600_mip_minify(h0, 0);
+	d0 = r600_mip_minify(d0, 0);
 	for(i = 0, offset = 0, level = blevel; i < nlevels; i++, level++) {
-		width = mip_minify(w0, i);
-		nbx = fmt_get_nblocksx(format, width);
+		width = r600_mip_minify(w0, i);
+		nbx = r600_fmt_get_nblocksx(format, width);
 
 		nbx = round_up(nbx, block_align);
 
-		height = mip_minify(h0, i);
-		nby = fmt_get_nblocksy(format, height);
+		height = r600_mip_minify(h0, i);
+		nby = r600_fmt_get_nblocksy(format, height);
 		nby = round_up(nby, height_align);
 
-		depth = mip_minify(d0, i);
+		depth = r600_mip_minify(d0, i);
 
 		size = nbx * nby * blocksize;
 		if (nfaces)
@@ -1387,7 +1414,7 @@
 		return -EINVAL;
 	}
 	format = G_038004_DATA_FORMAT(word1);
-	if (!fmt_is_valid_texture(format, p->family)) {
+	if (!r600_fmt_is_valid_texture(format, p->family)) {
 		dev_warn(p->dev, "%s:%d texture invalid format %d\n",
 			 __func__, __LINE__, format);
 		return -EINVAL;
@@ -1400,7 +1427,7 @@
 	array_check.nbanks = track->nbanks;
 	array_check.npipes = track->npipes;
 	array_check.nsamples = 1;
-	array_check.blocksize = fmt_get_blocksize(format);
+	array_check.blocksize = r600_fmt_get_blocksize(format);
 	if (r600_get_array_mode_alignment(&array_check,
 					  &pitch_align, &height_align, &depth_align, &base_align)) {
 		dev_warn(p->dev, "%s:%d tex array mode (%d) invalid\n",
@@ -1433,6 +1460,10 @@
 	word1 = radeon_get_ib_value(p, idx + 5);
 	blevel = G_038010_BASE_LEVEL(word0);
 	llevel = G_038014_LAST_LEVEL(word1);
+	if (blevel > llevel) {
+		dev_warn(p->dev, "texture blevel %d > llevel %d\n",
+			 blevel, llevel);
+	}
 	if (array == 1) {
 		barray = G_038014_BASE_ARRAY(word1);
 		larray = G_038014_LAST_ARRAY(word1);
@@ -1444,8 +1475,10 @@
 			  &l0_size, &mipmap_size);
 	/* using get ib will give us the offset into the texture bo */
 	if ((l0_size + word2) > radeon_bo_size(texture)) {
-		dev_warn(p->dev, "texture bo too small (%d %d %d %d -> %d have %ld)\n",
-			w0, h0, format, word2, l0_size, radeon_bo_size(texture));
+		dev_warn(p->dev, "texture bo too small ((%d %d) (%d %d) %d %d %d -> %d have %ld)\n",
+			 w0, h0, pitch_align, height_align,
+			 array_check.array_mode, format, word2,
+			 l0_size, radeon_bo_size(texture));
 		dev_warn(p->dev, "alignments %d %d %d %lld\n", pitch, pitch_align, height_align, base_align);
 		return -EINVAL;
 	}