radeon: fix some hard lockups on r3/4/500s
This patch should fix hard lockup and convert them in
softlockup (ie you can ssh the box but the gpu is busted
and we are waiting in loop for it to come back to reason).
Signed-off-by: Dave Airlie <airlied@redhat.com>
diff --git a/drivers/gpu/drm/radeon/r300_cmdbuf.c b/drivers/gpu/drm/radeon/r300_cmdbuf.c
index 702df45..4b3bd63 100644
--- a/drivers/gpu/drm/radeon/r300_cmdbuf.c
+++ b/drivers/gpu/drm/radeon/r300_cmdbuf.c
@@ -136,6 +136,18 @@
ADVANCE_RING();
}
+ /* flus cache and wait idle clean after cliprect change */
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+ OUT_RING(R300_RB3D_DC_FLUSH);
+ ADVANCE_RING();
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+ ADVANCE_RING();
+ /* set flush flag */
+ dev_priv->track_flush |= RADEON_FLUSH_EMITED;
+
return 0;
}
@@ -166,13 +178,13 @@
ADD_RANGE(0x21DC, 1);
ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
ADD_RANGE(R300_VAP_CLIP_X_0, 4);
- ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1);
+ ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
ADD_RANGE(R300_GB_ENABLE, 1);
ADD_RANGE(R300_GB_MSPOS0, 5);
- ADD_RANGE(R300_TX_CNTL, 1);
+ ADD_RANGE(R300_TX_INVALTAGS, 1);
ADD_RANGE(R300_TX_ENABLE, 1);
ADD_RANGE(0x4200, 4);
ADD_RANGE(0x4214, 1);
@@ -388,15 +400,28 @@
if (sz * 16 > cmdbuf->bufsz)
return -EINVAL;
- BEGIN_RING(5 + sz * 4);
- /* Wait for VAP to come to senses.. */
- /* there is no need to emit it multiple times, (only once before VAP is programmed,
- but this optimization is for later */
- OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
+ /* VAP is very sensitive so we purge cache before we program it
+ * and we also flush its state before & after */
+ BEGIN_RING(6);
+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+ OUT_RING(R300_RB3D_DC_FLUSH);
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+ OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
+ OUT_RING(0);
+ ADVANCE_RING();
+ /* set flush flag */
+ dev_priv->track_flush |= RADEON_FLUSH_EMITED;
+
+ BEGIN_RING(3 + sz * 4);
OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
+ ADVANCE_RING();
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
+ OUT_RING(0);
ADVANCE_RING();
cmdbuf->buf += sz * 16;
@@ -424,6 +449,15 @@
OUT_RING_TABLE((int *)cmdbuf->buf, 8);
ADVANCE_RING();
+ BEGIN_RING(4);
+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+ OUT_RING(R300_RB3D_DC_FLUSH);
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+ ADVANCE_RING();
+ /* set flush flag */
+ dev_priv->track_flush |= RADEON_FLUSH_EMITED;
+
cmdbuf->buf += 8 * 4;
cmdbuf->bufsz -= 8 * 4;
@@ -613,11 +647,19 @@
case RADEON_CNTL_BITBLT_MULTI:
return r300_emit_bitblt_multi(dev_priv, cmdbuf);
- case RADEON_CP_INDX_BUFFER: /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
+ case RADEON_CP_INDX_BUFFER:
+ /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
return r300_emit_indx_buffer(dev_priv, cmdbuf);
- case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */
- case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */
- case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */
+ case RADEON_CP_3D_DRAW_IMMD_2:
+ /* triggers drawing using in-packet vertex data */
+ case RADEON_CP_3D_DRAW_VBUF_2:
+ /* triggers drawing of vertex buffers setup elsewhere */
+ case RADEON_CP_3D_DRAW_INDX_2:
+ /* triggers drawing using indices to vertex buffer */
+ /* whenever we send vertex we clear flush & purge */
+ dev_priv->track_flush &= ~(RADEON_FLUSH_EMITED |
+ RADEON_PURGE_EMITED);
+ break;
case RADEON_WAIT_FOR_IDLE:
case RADEON_CP_NOP:
/* these packets are safe */
@@ -713,17 +755,53 @@
*/
static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
{
+ uint32_t cache_z, cache_3d, cache_2d;
RING_LOCALS;
+
+ cache_z = R300_ZC_FLUSH;
+ cache_2d = R300_RB2D_DC_FLUSH;
+ cache_3d = R300_RB3D_DC_FLUSH;
+ if (!(dev_priv->track_flush & RADEON_PURGE_EMITED)) {
+ /* we can purge, primitive where draw since last purge */
+ cache_z |= R300_ZC_FREE;
+ cache_2d |= R300_RB2D_DC_FREE;
+ cache_3d |= R300_RB3D_DC_FREE;
+ }
- BEGIN_RING(6);
- OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
- OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A);
+ /* flush & purge zbuffer */
+ BEGIN_RING(2);
OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
- OUT_RING(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE|
- R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
- OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
- OUT_RING(0x0);
+ OUT_RING(cache_z);
ADVANCE_RING();
+ /* flush & purge 3d */
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+ OUT_RING(cache_3d);
+ ADVANCE_RING();
+ /* flush & purge texture */
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
+ OUT_RING(0);
+ ADVANCE_RING();
+ /* FIXME: is this one really needed ? */
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
+ OUT_RING(0);
+ ADVANCE_RING();
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+ ADVANCE_RING();
+ /* flush & purge 2d through E2 as RB2D will trigger lockup */
+ BEGIN_RING(4);
+ OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
+ OUT_RING(cache_2d);
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_HOST_IDLECLEAN);
+ ADVANCE_RING();
+ /* set flush & purge flags */
+ dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
}
/**
@@ -905,8 +983,7 @@
DRM_DEBUG("\n");
- /* See the comment above r300_emit_begin3d for why this call must be here,
- * and what the cleanup gotos are for. */
+ /* pacify */
r300_pacify(dev_priv);
if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
diff --git a/drivers/gpu/drm/radeon/r300_reg.h b/drivers/gpu/drm/radeon/r300_reg.h
index a6802f2..ee6f811 100644
--- a/drivers/gpu/drm/radeon/r300_reg.h
+++ b/drivers/gpu/drm/radeon/r300_reg.h
@@ -317,7 +317,7 @@
* Therefore, I suspect writing zero to 0x2284 synchronizes the engine and
* avoids bugs caused by still running shaders reading bad data from memory.
*/
-#define R300_VAP_PVS_WAITIDLE 0x2284 /* GUESS */
+#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284
/* Absolutely no clue what this register is about. */
#define R300_VAP_UNKNOWN_2288 0x2288
@@ -513,7 +513,7 @@
/* gap */
/* Zero to flush caches. */
-#define R300_TX_CNTL 0x4100
+#define R300_TX_INVALTAGS 0x4100
#define R300_TX_FLUSH 0x0
/* The upper enable bits are guessed, based on fglrx reported limits. */
@@ -1362,6 +1362,7 @@
#define R300_RB3D_COLORPITCH2 0x4E40 /* GUESS */
#define R300_RB3D_COLORPITCH3 0x4E44 /* GUESS */
+#define R300_RB3D_AARESOLVE_CTL 0x4E88
/* gap */
/* Guess by Vladimir.
diff --git a/drivers/gpu/drm/radeon/radeon_cp.c b/drivers/gpu/drm/radeon/radeon_cp.c
index f0de81a..3331f88 100644
--- a/drivers/gpu/drm/radeon/radeon_cp.c
+++ b/drivers/gpu/drm/radeon/radeon_cp.c
@@ -40,6 +40,7 @@
#define RADEON_FIFO_DEBUG 0
static int radeon_do_cleanup_cp(struct drm_device * dev);
+static void radeon_do_cp_start(drm_radeon_private_t * dev_priv);
static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr)
{
@@ -198,23 +199,8 @@
DRM_UDELAY(1);
}
} else {
- /* 3D */
- tmp = RADEON_READ(R300_RB3D_DSTCACHE_CTLSTAT);
- tmp |= RADEON_RB3D_DC_FLUSH_ALL;
- RADEON_WRITE(R300_RB3D_DSTCACHE_CTLSTAT, tmp);
-
- /* 2D */
- tmp = RADEON_READ(R300_DSTCACHE_CTLSTAT);
- tmp |= RADEON_RB3D_DC_FLUSH_ALL;
- RADEON_WRITE(R300_DSTCACHE_CTLSTAT, tmp);
-
- for (i = 0; i < dev_priv->usec_timeout; i++) {
- if (!(RADEON_READ(R300_DSTCACHE_CTLSTAT)
- & RADEON_RB3D_DC_BUSY)) {
- return 0;
- }
- DRM_UDELAY(1);
- }
+ /* don't flush or purge cache here or lockup */
+ return 0;
}
#if RADEON_FIFO_DEBUG
@@ -237,6 +223,9 @@
return 0;
DRM_UDELAY(1);
}
+ DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n",
+ RADEON_READ(RADEON_RBBM_STATUS),
+ RADEON_READ(R300_VAP_CNTL_STATUS));
#if RADEON_FIFO_DEBUG
DRM_ERROR("failed!\n");
@@ -263,6 +252,9 @@
}
DRM_UDELAY(1);
}
+ DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n",
+ RADEON_READ(RADEON_RBBM_STATUS),
+ RADEON_READ(R300_VAP_CNTL_STATUS));
#if RADEON_FIFO_DEBUG
DRM_ERROR("failed!\n");
@@ -443,14 +435,20 @@
dev_priv->cp_running = 1;
- BEGIN_RING(6);
-
+ BEGIN_RING(8);
+ /* isync can only be written through cp on r5xx write it here */
+ OUT_RING(CP_PACKET0(RADEON_ISYNC_CNTL, 0));
+ OUT_RING(RADEON_ISYNC_ANY2D_IDLE3D |
+ RADEON_ISYNC_ANY3D_IDLE2D |
+ RADEON_ISYNC_WAIT_IDLEGUI |
+ RADEON_ISYNC_CPSCRATCH_IDLEGUI);
RADEON_PURGE_CACHE();
RADEON_PURGE_ZCACHE();
RADEON_WAIT_UNTIL_IDLE();
-
ADVANCE_RING();
COMMIT_RING();
+
+ dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
}
/* Reset the Command Processor. This will not flush any pending
diff --git a/drivers/gpu/drm/radeon/radeon_drv.h b/drivers/gpu/drm/radeon/radeon_drv.h
index 3f0eca9..0993816 100644
--- a/drivers/gpu/drm/radeon/radeon_drv.h
+++ b/drivers/gpu/drm/radeon/radeon_drv.h
@@ -220,6 +220,9 @@
struct drm_file *file_priv;
};
+#define RADEON_FLUSH_EMITED (1 < 0)
+#define RADEON_PURGE_EMITED (1 < 1)
+
typedef struct drm_radeon_private {
drm_radeon_ring_buffer_t ring;
drm_radeon_sarea_t *sarea_priv;
@@ -311,6 +314,7 @@
unsigned long fb_aper_offset;
int num_gb_pipes;
+ int track_flush;
} drm_radeon_private_t;
typedef struct drm_radeon_buf_priv {
@@ -693,7 +697,6 @@
#define R300_ZB_ZCACHE_CTLSTAT 0x4f18
# define R300_ZC_FLUSH (1 << 0)
# define R300_ZC_FREE (1 << 1)
-# define R300_ZC_FLUSH_ALL 0x3
# define R300_ZC_BUSY (1 << 31)
#define RADEON_RB3D_DSTCACHE_CTLSTAT 0x325c
# define RADEON_RB3D_DC_FLUSH (3 << 0)
@@ -701,6 +704,8 @@
# define RADEON_RB3D_DC_FLUSH_ALL 0xf
# define RADEON_RB3D_DC_BUSY (1 << 31)
#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c
+# define R300_RB3D_DC_FLUSH (2 << 0)
+# define R300_RB3D_DC_FREE (2 << 2)
# define R300_RB3D_DC_FINISH (1 << 4)
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
# define RADEON_Z_TEST_MASK (7 << 4)
@@ -1246,17 +1251,17 @@
OUT_RING(RADEON_RB3D_DC_FLUSH); \
} else { \
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \
- OUT_RING(RADEON_RB3D_DC_FLUSH); \
+ OUT_RING(R300_RB3D_DC_FLUSH); \
} \
} while (0)
#define RADEON_PURGE_CACHE() do { \
if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \
OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \
- OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \
+ OUT_RING(RADEON_RB3D_DC_FLUSH | RADEON_RB3D_DC_FREE); \
} else { \
OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \
- OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \
+ OUT_RING(R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE); \
} \
} while (0)
@@ -1273,10 +1278,10 @@
#define RADEON_PURGE_ZCACHE() do { \
if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \
OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \
- OUT_RING(RADEON_RB3D_ZC_FLUSH_ALL); \
+ OUT_RING(RADEON_RB3D_ZC_FLUSH | RADEON_RB3D_ZC_FREE); \
} else { \
- OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \
- OUT_RING(R300_ZC_FLUSH_ALL); \
+ OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0)); \
+ OUT_RING(R300_ZC_FLUSH | R300_ZC_FREE); \
} \
} while (0)