Merge "msm: kgsl: Implement KGSL fault tolerance policy in the dispatcher"
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 184dd982..3d1a3ae 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -114,18 +114,6 @@
 	.long_ib_detect = 1,
 };
 
-/* This set of registers are used for Hang detection
- * If the values of these registers are same after
- * KGSL_TIMEOUT_PART time, GPU hang is reported in
- * kernel log.
- * *****ALERT******ALERT********ALERT*************
- * Order of registers below is important, registers
- * from LONG_IB_DETECT_REG_INDEX_START to
- * LONG_IB_DETECT_REG_INDEX_END are used in long ib detection.
- */
-#define LONG_IB_DETECT_REG_INDEX_START 1
-#define LONG_IB_DETECT_REG_INDEX_END 5
-
 unsigned int ft_detect_regs[FT_DETECT_REGS_COUNT];
 
 /*
@@ -1722,6 +1710,9 @@
 				(device->pwrctrl.gpu_cx &&
 				regulator_is_enabled(device->pwrctrl.gpu_cx)));
 
+	/* Clear any GPU faults that might have been left over */
+	adreno_set_gpu_fault(adreno_dev, 0);
+
 	/* Power up the device */
 	kgsl_pwrctrl_enable(device);
 
@@ -1830,29 +1821,35 @@
  */
 int adreno_reset(struct kgsl_device *device)
 {
-	int ret;
+	int ret = 0;
 
 	/* Try soft reset first */
-	if (adreno_soft_reset(device) == 0)
-		return 0;
+	if (adreno_soft_reset(device) != 0) {
+		KGSL_DEV_ERR_ONCE(device, "Device soft reset failed\n");
 
-	/* If it failed, then pull the power */
-	ret = adreno_stop(device);
-	if (ret)
-		return ret;
+		/* If it failed, then pull the power */
+		ret = adreno_stop(device);
+		if (ret)
+			return ret;
 
-	ret = adreno_start(device);
+		ret = adreno_start(device);
 
-	if (ret == 0) {
-		/*
-		 * If active_cnt is non-zero then the system was active before
-		 * going into a reset - put it back in that state
-		 */
-
-		if (atomic_read(&device->active_cnt))
-			kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+		if (ret)
+			return ret;
 	}
 
+	/*
+	 * If active_cnt is non-zero then the system was active before
+	 * going into a reset - put it back in that state
+	 */
+
+	if (atomic_read(&device->active_cnt))
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+	/* Set the page table back to the default page table */
+	kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
+			KGSL_MEMSTORE_GLOBAL);
+
 	return ret;
 }
 
@@ -2310,6 +2307,13 @@
 	/* Stop the ringbuffer */
 	adreno_ringbuffer_stop(&adreno_dev->ringbuffer);
 
+	if (kgsl_pwrctrl_isenabled(device))
+		device->ftbl->irqctrl(device, 0);
+
+	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+
+	adreno_set_gpu_fault(adreno_dev, 0);
+
 	/* Delete the idle timer */
 	del_timer_sync(&device->idle_timer);
 
@@ -2391,12 +2395,20 @@
 			0x110, 0x110);
 
 	while (time_before(jiffies, wait)) {
+		/*
+		 * If we fault, stop waiting and return an error. The dispatcher
+		 * will clean up the fault from the work queue, but we need to
+		 * make sure we don't block it by waiting for an idle that
+		 * will never come.
+		 */
+
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			return -EDEADLK;
+
 		if (adreno_isidle(device))
 			return 0;
 	}
 
-	kgsl_postmortem_dump(device, 0);
-
 	return -ETIMEDOUT;
 }
 
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 32e43b2..d070e29 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -35,12 +35,11 @@
 #define ADRENO_CHIPID_PATCH(_id) ((_id) & 0xFF)
 
 /* Flags to control command packet settings */
-#define KGSL_CMD_FLAGS_NONE             0x00000000
-#define KGSL_CMD_FLAGS_PMODE		0x00000001
-#define KGSL_CMD_FLAGS_INTERNAL_ISSUE	0x00000002
-#define KGSL_CMD_FLAGS_GET_INT		0x00000004
-#define KGSL_CMD_FLAGS_PROFILE		0x00000008
-#define KGSL_CMD_FLAGS_EOF	        0x00000100
+#define KGSL_CMD_FLAGS_NONE             0
+#define KGSL_CMD_FLAGS_PMODE		BIT(0)
+#define KGSL_CMD_FLAGS_INTERNAL_ISSUE   BIT(1)
+#define KGSL_CMD_FLAGS_WFI              BIT(2)
+#define KGSL_CMD_FLAGS_PROFILE		BIT(3)
 
 /* Command identifiers */
 #define KGSL_CONTEXT_TO_MEM_IDENTIFIER	0x2EADBEEF
@@ -96,6 +95,10 @@
 	TRACE_BUS_CTL,
 };
 
+#define ADRENO_SOFT_FAULT 1
+#define ADRENO_HARD_FAULT 2
+#define ADRENO_TIMEOUT_FAULT 3
+
 /*
  * Maximum size of the dispatcher ringbuffer - the actual inflight size will be
  * smaller then this but this size will allow for a larger range of inflight
@@ -110,7 +113,7 @@
  * @state: Current state of the dispatcher (active or paused)
  * @timer: Timer to monitor the progress of the command batches
  * @inflight: Number of command batch operations pending in the ringbuffer
- * @fault: True if a HW fault was detected
+ * @fault: Non-zero if a fault was detected.
  * @pending: Priority list of contexts waiting to submit command batches
  * @plist_lock: Spin lock to protect the pending queue
  * @cmdqueue: Queue of command batches currently flight
@@ -125,8 +128,9 @@
 	struct mutex mutex;
 	unsigned int state;
 	struct timer_list timer;
+	struct timer_list fault_timer;
 	unsigned int inflight;
-	int fault;
+	atomic_t fault;
 	struct plist_head pending;
 	spinlock_t plist_lock;
 	struct kgsl_cmdbatch *cmdqueue[ADRENO_DISPATCH_CMDQUEUE_SIZE];
@@ -340,13 +344,16 @@
 };
 
 /* Fault Tolerance policy flags */
-#define  KGSL_FT_OFF                      BIT(0)
-#define  KGSL_FT_REPLAY                   BIT(1)
-#define  KGSL_FT_SKIPIB                   BIT(2)
-#define  KGSL_FT_SKIPFRAME                BIT(3)
-#define  KGSL_FT_DISABLE                  BIT(4)
-#define  KGSL_FT_TEMP_DISABLE             BIT(5)
-#define  KGSL_FT_DEFAULT_POLICY           (KGSL_FT_REPLAY + KGSL_FT_SKIPIB)
+#define  KGSL_FT_OFF                      0
+#define  KGSL_FT_REPLAY                   1
+#define  KGSL_FT_SKIPIB                   2
+#define  KGSL_FT_SKIPFRAME                3
+#define  KGSL_FT_DISABLE                  4
+#define  KGSL_FT_TEMP_DISABLE             5
+#define  KGSL_FT_DEFAULT_POLICY (BIT(KGSL_FT_REPLAY) + BIT(KGSL_FT_SKIPIB))
+
+/* This internal bit is used to skip the PM dump on replayed command batches */
+#define  KGSL_FT_SKIP_PMDUMP              31
 
 /* Pagefault policy flags */
 #define KGSL_FT_PAGEFAULT_INT_ENABLE         BIT(0)
@@ -356,6 +363,14 @@
 #define KGSL_FT_PAGEFAULT_DEFAULT_POLICY     (KGSL_FT_PAGEFAULT_INT_ENABLE + \
 					KGSL_FT_PAGEFAULT_GPUHALT_ENABLE)
 
+#define ADRENO_FT_TYPES \
+	{ BIT(KGSL_FT_OFF), "off" }, \
+	{ BIT(KGSL_FT_REPLAY), "replay" }, \
+	{ BIT(KGSL_FT_SKIPIB), "skipib" }, \
+	{ BIT(KGSL_FT_SKIPFRAME), "skipframe" }, \
+	{ BIT(KGSL_FT_DISABLE), "disable" }, \
+	{ BIT(KGSL_FT_TEMP_DISABLE), "temp" }
+
 extern struct adreno_gpudev adreno_a2xx_gpudev;
 extern struct adreno_gpudev adreno_a3xx_gpudev;
 
@@ -741,4 +756,31 @@
 		return ADRENO_REG_REGISTER_MAX;
 	return adreno_dev->gpudev->reg_offsets->offsets[offset_name];
 }
+
+/**
+ * adreno_gpu_fault() - Return the current state of the GPU
+ * @adreno_dev: A ponter to the adreno_device to query
+ *
+ * Return 0 if there is no fault or positive with the last type of fault that
+ * occurred
+ */
+static inline unsigned int adreno_gpu_fault(struct adreno_device *adreno_dev)
+{
+	smp_rmb();
+	return atomic_read(&adreno_dev->dispatcher.fault);
+}
+
+/**
+ * adreno_set_gpu_fault() - Set the current fault status of the GPU
+ * @adreno_dev: A pointer to the adreno_device to set
+ * @state: fault state to set
+ *
+ */
+static inline void adreno_set_gpu_fault(struct adreno_device *adreno_dev,
+	int state)
+{
+	atomic_set(&adreno_dev->dispatcher.fault, state);
+	smp_wmb();
+}
+
 #endif /*__ADRENO_H */
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
index 8b75c4e..c4f81fa 100644
--- a/drivers/gpu/msm/adreno_a3xx.c
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -2596,7 +2596,7 @@
 
 		/* Clear the error */
 		kgsl_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
-		return;
+		goto done;
 	}
 	case A3XX_INT_RBBM_REG_TIMEOUT:
 		err = "RBBM: AHB register timeout";
@@ -2637,10 +2637,15 @@
 	case A3XX_INT_UCHE_OOB_ACCESS:
 		err = "UCHE:  Out of bounds access";
 		break;
+	default:
+		return;
 	}
-
 	KGSL_DRV_CRIT(device, "%s\n", err);
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+
+done:
+	/* Trigger a fault in the dispatcher - this will effect a restart */
+	adreno_dispatcher_irq_fault(device);
 }
 
 static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c
index e429934..4f1d7ec 100644
--- a/drivers/gpu/msm/adreno_dispatch.c
+++ b/drivers/gpu/msm/adreno_dispatch.c
@@ -41,6 +41,71 @@
 /* Command batch timeout (in milliseconds) */
 static unsigned int _cmdbatch_timeout = 2000;
 
+/* Interval for reading and comparing fault detection registers */
+static unsigned int _fault_timer_interval = 50;
+
+/* Local array for the current set of fault detect registers */
+static unsigned int fault_detect_regs[FT_DETECT_REGS_COUNT];
+
+/* The last retired global timestamp read during fault detect */
+static unsigned int fault_detect_ts;
+
+/**
+ * fault_detect_read() - Read the set of fault detect registers
+ * @device: Pointer to the KGSL device struct
+ *
+ * Read the set of fault detect registers and store them in the local array.
+ * This is for the initial values that are compared later with
+ * fault_detect_read_compare
+ */
+static void fault_detect_read(struct kgsl_device *device)
+{
+	int i;
+
+	fault_detect_ts = kgsl_readtimestamp(device, NULL,
+		KGSL_TIMESTAMP_RETIRED);
+
+	for (i = 0; i < FT_DETECT_REGS_COUNT; i++) {
+		if (ft_detect_regs[i] == 0)
+			continue;
+		kgsl_regread(device, ft_detect_regs[i],
+			&fault_detect_regs[i]);
+	}
+}
+
+/**
+ * fault_detect_read_compare() - Read the fault detect registers and compare
+ * them to the current value
+ * @device: Pointer to the KGSL device struct
+ *
+ * Read the set of fault detect registers and compare them to the current set
+ * of registers.  Return 1 if any of the register values changed
+ */
+static int fault_detect_read_compare(struct kgsl_device *device)
+{
+	int i, ret = 0;
+	unsigned int ts;
+
+	for (i = 0; i < FT_DETECT_REGS_COUNT; i++) {
+		unsigned int val;
+
+		if (ft_detect_regs[i] == 0)
+			continue;
+		kgsl_regread(device, ft_detect_regs[i], &val);
+		if (val != fault_detect_regs[i])
+			ret = 1;
+		fault_detect_regs[i] = val;
+	}
+
+	ts = kgsl_readtimestamp(device, NULL, KGSL_TIMESTAMP_RETIRED);
+	if (ts != fault_detect_ts)
+		ret = 1;
+
+	fault_detect_ts = ts;
+
+	return ret;
+}
+
 /**
  * adreno_dispatcher_get_cmdbatch() - Get a new command from a context queue
  * @drawctxt: Pointer to the adreno draw context
@@ -162,9 +227,17 @@
 
 	ret = adreno_ringbuffer_submitcmd(adreno_dev, cmdbatch);
 
-	/* Turn the GPU back off on failure.  Sad face. */
-	if (ret && dispatcher->inflight == 1)
-		kgsl_active_count_put(device);
+	/*
+	 * On the first command, if the submission was successful, then read the
+	 * fault registers.  If it failed then turn off the GPU. Sad face.
+	 */
+
+	if (dispatcher->inflight == 1) {
+		if (ret == 0)
+			fault_detect_read(device);
+		else
+			kgsl_active_count_put(device);
+	}
 
 	mutex_unlock(&device->mutex);
 
@@ -191,6 +264,12 @@
 		cmdbatch->expires = jiffies +
 			msecs_to_jiffies(_cmdbatch_timeout);
 		mod_timer(&dispatcher->timer, cmdbatch->expires);
+
+		/* Start the fault detection timer */
+		if (adreno_dev->fast_hang_detect)
+			mod_timer(&dispatcher->fault_timer,
+				jiffies +
+				msecs_to_jiffies(_fault_timer_interval));
 	}
 
 	return 0;
@@ -343,130 +422,6 @@
 }
 
 /**
- * adreno_dispatcher_replay() - Replay commands from the dispatcher queue
- * @adreno_dev: Pointer to the adreno device struct
- *
- * Replay the commands from the dispatcher inflight queue.  This is called after
- * a power down/up to recover from a fault
- */
-int adreno_dispatcher_replay(struct adreno_device *adreno_dev)
-{
-	struct kgsl_device *device = &adreno_dev->dev;
-	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
-	struct kgsl_cmdbatch **replay;
-	int i, ptr, count = 0;
-
-	BUG_ON(!mutex_is_locked(&dispatcher->mutex));
-
-	replay = kzalloc(sizeof(*replay) * dispatcher->inflight, GFP_KERNEL);
-
-	/*
-	 * If we can't allocate enough memory for the replay commands then we
-	 * are in a bad way.  Invalidate everything, reset the GPU and see ya
-	 * later alligator
-	 */
-
-	if (replay == NULL) {
-
-		ptr = dispatcher->head;
-
-		while (ptr != dispatcher->tail) {
-			struct kgsl_context *context =
-				dispatcher->cmdqueue[ptr]->context;
-
-			adreno_drawctxt_invalidate(device, context);
-			ptr = CMDQUEUE_NEXT(ptr, ADRENO_DISPATCH_CMDQUEUE_SIZE);
-		}
-
-		/* Reset the dispatcher queue */
-		dispatcher->inflight = 0;
-		dispatcher->head = dispatcher->tail = 0;
-
-		/* Reset the hardware */
-		mutex_lock(&device->mutex);
-
-		/*
-		 * If adreno_reset fails then the GPU is not alive and there
-		 * isn't anything we can do to recover at this point
-		 */
-
-		BUG_ON(adreno_reset(device));
-		mutex_unlock(&device->mutex);
-
-		return 0;
-	}
-
-	ptr = dispatcher->head;
-
-	while (ptr != dispatcher->tail) {
-		struct kgsl_cmdbatch *cmdbatch = dispatcher->cmdqueue[ptr];
-		struct adreno_context *drawctxt =
-			ADRENO_CONTEXT(cmdbatch->context);
-
-		if (cmdbatch->invalid)
-			adreno_drawctxt_invalidate(device, cmdbatch->context);
-
-		if (!kgsl_context_detached(cmdbatch->context) &&
-			drawctxt->state == ADRENO_CONTEXT_STATE_ACTIVE) {
-			/*
-			 * The context for the command batch is still valid -
-			 * add it to the replay list
-			 */
-			replay[count++] = dispatcher->cmdqueue[ptr];
-		} else {
-			/*
-			 * Skip over invaliated or detached contexts - cancel
-			 * any pending events for the timestamp and destroy the
-			 * command batch
-			 */
-			mutex_lock(&device->mutex);
-			kgsl_cancel_events_timestamp(device, cmdbatch->context,
-				cmdbatch->timestamp);
-			mutex_unlock(&device->mutex);
-
-			kgsl_cmdbatch_destroy(cmdbatch);
-		}
-
-		ptr = CMDQUEUE_NEXT(ptr, ADRENO_DISPATCH_CMDQUEUE_SIZE);
-	}
-
-	/* Reset the dispatcher queue */
-	dispatcher->inflight = 0;
-	dispatcher->head = dispatcher->tail = 0;
-
-	mutex_lock(&device->mutex);
-	BUG_ON(adreno_reset(device));
-	mutex_unlock(&device->mutex);
-
-	/* Replay the pending command buffers */
-	for (i = 0; i < count; i++) {
-		int ret = sendcmd(adreno_dev, replay[i]);
-
-		/*
-		 * I'm afraid that if we get an error during replay we
-		 * are not going to space today
-		 */
-
-		BUG_ON(ret);
-	}
-
-	/*
-	 * active_count will be set when we come into this function because
-	 * there were inflight commands.  By virtue of setting ->inflight back
-	 * to 0 sendcmd() will increase the active count again on the first
-	 * submission.  This active_count_put is needed to put the universe back
-	 * in balance and as a bonus it ensures that the hardware stays up for
-	 * the entire reset process
-	 */
-	mutex_lock(&device->mutex);
-	kgsl_active_count_put(device);
-	mutex_unlock(&device->mutex);
-
-	kfree(replay);
-	return 0;
-}
-
-/**
  * adreno_dispatcher_queue_cmd() - Queue a new command in the context
  * @adreno_dev: Pointer to the adreno device struct
  * @drawctxt: Pointer to the adreno draw context
@@ -489,6 +444,42 @@
 		return -EINVAL;
 	}
 
+	/*
+	 * After skipping to the end of the frame we need to force the preamble
+	 * to run (if it exists) regardless of the context state.
+	 */
+
+	if (drawctxt->flags & CTXT_FLAGS_FORCE_PREAMBLE) {
+		set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv);
+		drawctxt->flags &= ~CTXT_FLAGS_FORCE_PREAMBLE;
+	}
+
+	/*
+	 * If we are waiting for the end of frame and it hasn't appeared yet,
+	 * then mark the command batch as skipped.  It will still progress
+	 * through the pipeline but it won't actually send any commands
+	 */
+
+	if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) {
+		set_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv);
+
+		/*
+		 * If this command batch represents the EOF then clear the way
+		 * for the dispatcher to continue submitting
+		 */
+
+		if (cmdbatch->flags & KGSL_CONTEXT_END_OF_FRAME) {
+			drawctxt->flags &= ~CTXT_FLAGS_SKIP_EOF;
+
+			/*
+			 * Force the preamble on the next command to ensure that
+			 * the state is correct
+			 */
+
+			drawctxt->flags |= CTXT_FLAGS_FORCE_PREAMBLE;
+		}
+	}
+
 	/* Wait for room in the context queue */
 
 	while (drawctxt->queued >= _context_cmdqueue_size) {
@@ -536,6 +527,16 @@
 	cmdbatch->timestamp = drawctxt->timestamp;
 	*timestamp = drawctxt->timestamp;
 
+	/*
+	 * Set the fault tolerance policy for the command batch - assuming the
+	 * context hsn't disabled FT use the current device policy
+	 */
+
+	if (drawctxt->flags & CTXT_FLAGS_NO_FAULT_TOLERANCE)
+		set_bit(KGSL_FT_DISABLE, &cmdbatch->fault_policy);
+	else
+		cmdbatch->fault_policy = adreno_dev->ft_policy;
+
 	/* Put the command into the queue */
 	drawctxt->cmdqueue[drawctxt->cmdqueue_tail] = cmdbatch;
 	drawctxt->cmdqueue_tail = (drawctxt->cmdqueue_tail + 1) %
@@ -565,58 +566,442 @@
 	return 0;
 }
 
-/**
- * dispatcher_do_fault() - Handle a GPU fault and reset the GPU
- * @device: Pointer to the KGSL device
- * @cmdbatch: Pointer to the command batch believed to be responsible for the
- * fault
- * @invalidate: Non zero if the current command should be invalidated
- *
- * Trigger a fault in the dispatcher and start the replay process
+/*
+ * If an IB inside of the command batch has a gpuaddr that matches the base
+ * passed in then zero the size which effectively skips it when it is submitted
+ * in the ringbuffer.
  */
-static void dispatcher_do_fault(struct kgsl_device *device,
-		struct kgsl_cmdbatch *cmdbatch, int invalidate)
+static void cmdbatch_skip_ib(struct kgsl_cmdbatch *cmdbatch, unsigned int base)
+{
+	int i;
+
+	for (i = 0; i < cmdbatch->ibcount; i++) {
+		if (cmdbatch->ibdesc[i].gpuaddr == base) {
+			cmdbatch->ibdesc[i].sizedwords = 0;
+			return;
+		}
+	}
+}
+
+static void cmdbatch_skip_frame(struct kgsl_cmdbatch *cmdbatch,
+	struct kgsl_cmdbatch **replay, int count)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(cmdbatch->context);
+	int skip = 1;
+	int i;
+
+	for (i = 0; i < count; i++) {
+
+		/*
+		 * Only operate on command batches that belong to the
+		 * faulting context
+		 */
+
+		if (replay[i]->context->id != cmdbatch->context->id)
+			continue;
+
+		/*
+		 * Skip all the command batches in this context until
+		 * the EOF flag is seen.  If the EOF flag is seen then
+		 * force the preamble for the next command.
+		 */
+
+		if (skip) {
+			set_bit(CMDBATCH_FLAG_SKIP, &replay[i]->priv);
+
+			if (replay[i]->flags & KGSL_CONTEXT_END_OF_FRAME)
+				skip = 0;
+		} else {
+			set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &replay[i]->priv);
+			return;
+		}
+	}
+
+	/*
+	 * If the EOF flag hasn't been seen yet then set the flag in the
+	 * drawctxt to keep looking for it
+	 */
+
+	if (skip && drawctxt)
+		drawctxt->flags |= CTXT_FLAGS_SKIP_EOF;
+
+	/*
+	 * If we did see the EOF flag then force the preamble on for the
+	 * next command issued on this context
+	 */
+
+	if (!skip && drawctxt)
+		drawctxt->flags |= CTXT_FLAGS_FORCE_PREAMBLE;
+}
+
+static void remove_invalidated_cmdbatches(struct kgsl_device *device,
+		struct kgsl_cmdbatch **replay, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		struct kgsl_cmdbatch *cmd = replay[i];
+		struct adreno_context *drawctxt;
+
+		if (cmd == NULL)
+			continue;
+
+		drawctxt = ADRENO_CONTEXT(cmd->context);
+
+		if (kgsl_context_detached(cmd->context) ||
+			drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) {
+			replay[i] = NULL;
+
+			mutex_lock(&device->mutex);
+			kgsl_cancel_events_timestamp(device, cmd->context,
+				cmd->timestamp);
+			mutex_unlock(&device->mutex);
+
+			kgsl_cmdbatch_destroy(cmd);
+		}
+	}
+}
+
+static char _pidname[TASK_COMM_LEN];
+
+static inline const char *_kgsl_context_comm(struct kgsl_context *context)
+{
+	struct task_struct *task = NULL;
+
+	if (context)
+		task = find_task_by_vpid(context->pid);
+
+	if (task)
+		get_task_comm(_pidname, task);
+	else
+		snprintf(_pidname, TASK_COMM_LEN, "unknown");
+
+	return _pidname;
+}
+
+#define pr_fault(_d, _c, fmt, args...) \
+		dev_err((_d)->dev, "%s[%d]: " fmt, \
+		_kgsl_context_comm((_c)->context), \
+		(_c)->context->pid, ##args)
+
+
+static void adreno_fault_header(struct kgsl_device *device,
+	struct kgsl_cmdbatch *cmdbatch)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int status, base, rptr, wptr, ib1base, ib2base, ib1sz, ib2sz;
+
+	kgsl_regread(device,
+			adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS),
+			&status);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_RB_BASE),
+		&base);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_RB_RPTR),
+		&rptr);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_RB_WPTR),
+		&wptr);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB1_BASE),
+		&ib1base);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ),
+		&ib1sz);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB2_BASE),
+		&ib2base);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ),
+		&ib2sz);
+
+	trace_adreno_gpu_fault(cmdbatch->context->id, cmdbatch->timestamp,
+		status, rptr, wptr, ib1base, ib1sz, ib2base, ib2sz);
+
+	pr_fault(device, cmdbatch,
+		"gpu fault ctx %d ts %d status %8.8X rb %4.4x/%4.4x ib1 %8.8x/%4.4x ib2 %8.8x/%4.4x\n",
+		cmdbatch->context->id, cmdbatch->timestamp, status,
+		rptr, wptr, ib1base, ib1sz, ib2base, ib2sz);
+}
+
+static int dispatcher_do_fault(struct kgsl_device *device)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
-	unsigned int reg;
+	unsigned int ptr;
+	unsigned int reg, base;
+	struct kgsl_cmdbatch **replay = NULL;
+	struct kgsl_cmdbatch *cmdbatch;
+	int ret, i, count = 0;
+	int fault, first = 0;
+	bool pagefault = false;
+	BUG_ON(dispatcher->inflight == 0);
 
-	/* Stop the timers */
+	fault = atomic_xchg(&dispatcher->fault, 0);
+	if (fault == 0)
+		return 0;
+
+	/* Turn off all the timers */
 	del_timer_sync(&dispatcher->timer);
+	del_timer_sync(&dispatcher->fault_timer);
 
 	mutex_lock(&device->mutex);
 
-	/*
-	 * There is an interesting race condition here - when a command batch
-	 * expires and we invaliate before we recover we run the risk of having
-	 * the UMD clean up the context and free memory that the GPU is still
-	 * using.  Not that it is dangerous because we are a few microseconds
-	 * away from resetting, but it still ends up in pagefaults and log
-	 * messages and so on and so forth. To avoid this we mark the command
-	 * batch itself as invalid and then reset - the context will get
-	 * invalidated in the replay.
-	 */
+	cmdbatch = dispatcher->cmdqueue[dispatcher->head];
 
-	if (invalidate)
-		cmdbatch->invalid = 1;
+	trace_adreno_cmdbatch_fault(cmdbatch, fault);
 
 	/*
-	 * Stop the CP in its tracks - this ensures that we don't get activity
-	 * while we are trying to dump the state of the system
+	 * If the fault was due to a timeout then stop the CP to ensure we don't
+	 * get activity while we are trying to dump the state of the system
 	 */
 
+	if (fault == ADRENO_TIMEOUT_FAULT) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, &reg);
+		reg |= (1 << 27) | (1 << 28);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg);
 
-	adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, &reg);
-	reg |= (1 << 27) | (1 << 28);
-	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg);
+		/* Skip the PM dump for a timeout because it confuses people */
+		set_bit(KGSL_FT_SKIP_PMDUMP, &cmdbatch->fault_policy);
+	}
 
-	kgsl_postmortem_dump(device, 0);
-	kgsl_device_snapshot(device, 1);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BASE, &base);
+
+	/*
+	 * Dump the postmortem and snapshot information if this is the first
+	 * detected fault for the oldest active command batch
+	 */
+
+	if (!test_bit(KGSL_FT_SKIP_PMDUMP, &cmdbatch->fault_policy)) {
+		adreno_fault_header(device, cmdbatch);
+
+		if (device->pm_dump_enable)
+			kgsl_postmortem_dump(device, 0);
+
+		kgsl_device_snapshot(device, 1);
+	}
+
 	mutex_unlock(&device->mutex);
 
-	/* If we can't replay then bravely run away and die */
-	if (adreno_dispatcher_replay(adreno_dev))
-		BUG();
+	/* Allocate memory to store the inflight commands */
+	replay = kzalloc(sizeof(*replay) * dispatcher->inflight, GFP_KERNEL);
+
+	if (replay == NULL) {
+		unsigned int ptr = dispatcher->head;
+
+		while (ptr != dispatcher->tail) {
+			struct kgsl_context *context =
+				dispatcher->cmdqueue[ptr]->context;
+
+			adreno_drawctxt_invalidate(device, context);
+			kgsl_cmdbatch_destroy(dispatcher->cmdqueue[ptr]);
+
+			ptr = CMDQUEUE_NEXT(ptr, ADRENO_DISPATCH_CMDQUEUE_SIZE);
+		}
+
+		/*
+		 * Set the replay count to zero - this will ensure that the
+		 * hardware gets reset but nothing else goes played
+		 */
+
+		count = 0;
+		goto replay;
+	}
+
+	/* Copy the inflight command batches into the temporary storage */
+	ptr = dispatcher->head;
+
+	while (ptr != dispatcher->tail) {
+		replay[count++] = dispatcher->cmdqueue[ptr];
+		ptr = CMDQUEUE_NEXT(ptr, ADRENO_DISPATCH_CMDQUEUE_SIZE);
+	}
+
+	/*
+	 * For the purposes of replay, we assume that the oldest command batch
+	 * that hasn't retired a timestamp is "hung".
+	 */
+
+	cmdbatch = replay[0];
+
+	/*
+	 * If FT is disabled for this cmdbatch invalidate immediately
+	 */
+
+	if (test_bit(KGSL_FT_DISABLE, &cmdbatch->fault_policy) ||
+		test_bit(KGSL_FT_TEMP_DISABLE, &cmdbatch->fault_policy)) {
+		pr_fault(device, cmdbatch, "gpu skipped ctx %d ts %d\n",
+			cmdbatch->context->id, cmdbatch->timestamp);
+
+		adreno_drawctxt_invalidate(device, cmdbatch->context);
+	}
+
+	/*
+	 * Set a flag so we don't print another PM dump if the cmdbatch fails
+	 * again on replay
+	 */
+
+	set_bit(KGSL_FT_SKIP_PMDUMP, &cmdbatch->fault_policy);
+
+	/*
+	 * A hardware fault generally means something was deterministically
+	 * wrong with the command batch - no point in trying to replay it
+	 * Clear the replay bit and move on to the next policy level
+	 */
+
+	if (fault == ADRENO_HARD_FAULT)
+		clear_bit(KGSL_FT_REPLAY, &(cmdbatch->fault_policy));
+
+	/*
+	 * A timeout fault means the IB timed out - clear the policy and
+	 * invalidate - this will clear the FT_SKIP_PMDUMP bit but that is okay
+	 * because we won't see this cmdbatch again
+	 */
+
+	if (fault == ADRENO_TIMEOUT_FAULT)
+		bitmap_zero(&cmdbatch->fault_policy, BITS_PER_LONG);
+
+	/*
+	 * If the context had a GPU page fault then it is likely it would fault
+	 * again if replayed
+	 */
+
+	if (test_bit(KGSL_CONTEXT_PAGEFAULT, &cmdbatch->context->priv)) {
+		/* we'll need to resume the mmu later... */
+		pagefault = true;
+		clear_bit(KGSL_FT_REPLAY, &cmdbatch->fault_policy);
+		clear_bit(KGSL_CONTEXT_PAGEFAULT, &cmdbatch->context->priv);
+	}
+
+	/*
+	 * Execute the fault tolerance policy. Each command batch stores the
+	 * current fault policy that was set when it was queued.
+	 * As the options are tried in descending priority
+	 * (REPLAY -> SKIPIBS -> SKIPFRAME -> NOTHING) the bits are cleared
+	 * from the cmdbatch policy so the next thing can be tried if the
+	 * change comes around again
+	 */
+
+	/* Replay the hanging command batch again */
+	if (test_and_clear_bit(KGSL_FT_REPLAY, &cmdbatch->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdbatch, BIT(KGSL_FT_REPLAY));
+		set_bit(KGSL_FT_REPLAY, &cmdbatch->fault_recovery);
+		goto replay;
+	}
+
+	/*
+	 * Skip the last IB1 that was played but replay everything else.
+	 * Note that the last IB1 might not be in the "hung" command batch
+	 * because the CP may have caused a page-fault while it was prefetching
+	 * the next IB1/IB2. walk all outstanding commands and zap the
+	 * supposedly bad IB1 where ever it lurks.
+	 */
+
+	if (test_and_clear_bit(KGSL_FT_SKIPIB, &cmdbatch->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdbatch, BIT(KGSL_FT_SKIPIB));
+		set_bit(KGSL_FT_SKIPIB, &cmdbatch->fault_recovery);
+
+		for (i = 0; i < count; i++) {
+			if (replay[i] != NULL)
+				cmdbatch_skip_ib(replay[i], base);
+		}
+
+		goto replay;
+	}
+
+	if (test_and_clear_bit(KGSL_FT_SKIPFRAME, &cmdbatch->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdbatch,
+			BIT(KGSL_FT_SKIPFRAME));
+		set_bit(KGSL_FT_SKIPFRAME, &cmdbatch->fault_recovery);
+
+		/*
+		 * Skip all the pending command batches for this context until
+		 * the EOF frame is seen
+		 */
+		cmdbatch_skip_frame(cmdbatch, replay, count);
+		goto replay;
+	}
+
+	/* If we get here then all the policies failed */
+
+	pr_fault(device, cmdbatch, "gpu failed ctx %d ts %d\n",
+		cmdbatch->context->id, cmdbatch->timestamp);
+
+	/* Invalidate the context */
+	adreno_drawctxt_invalidate(device, cmdbatch->context);
+
+
+replay:
+	/* Reset the dispatcher queue */
+	dispatcher->inflight = 0;
+	dispatcher->head = dispatcher->tail = 0;
+
+	/* Reset the GPU */
+	mutex_lock(&device->mutex);
+
+	/* resume the MMU if it is stalled */
+	if (pagefault && device->mmu.mmu_ops->mmu_pagefault_resume != NULL)
+		device->mmu.mmu_ops->mmu_pagefault_resume(&device->mmu);
+
+	ret = adreno_reset(device);
+	mutex_unlock(&device->mutex);
+
+	/* If adreno_reset() fails then what hope do we have for the future? */
+	BUG_ON(ret);
+
+	/* Remove any pending command batches that have been invalidated */
+	remove_invalidated_cmdbatches(device, replay, count);
+
+	/* Replay the pending command buffers */
+	for (i = 0; i < count; i++) {
+
+		int ret;
+
+		if (replay[i] == NULL)
+			continue;
+
+		/*
+		 * Force the preamble on the first command (if applicable) to
+		 * avoid any strange stage issues
+		 */
+
+		if (first == 0) {
+			set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &replay[i]->priv);
+			first = 1;
+		}
+
+		/*
+		 * Force each command batch to wait for idle - this avoids weird
+		 * CP parse issues
+		 */
+
+		set_bit(CMDBATCH_FLAG_WFI, &replay[i]->priv);
+
+		ret = sendcmd(adreno_dev, replay[i]);
+
+		/*
+		 * If sending the command fails, then try to recover by
+		 * invalidating the context
+		 */
+
+		if (ret) {
+			pr_fault(device, replay[i],
+				"gpu reset failed ctx %d ts %d\n",
+				replay[i]->context->id, replay[i]->timestamp);
+
+			adreno_drawctxt_invalidate(device, replay[i]->context);
+			remove_invalidated_cmdbatches(device, &replay[i],
+				count - i);
+		}
+	}
+
+	mutex_lock(&device->mutex);
+	kgsl_active_count_put(device);
+	mutex_unlock(&device->mutex);
+
+	kfree(replay);
+
+	return 1;
 }
 
 static inline int cmdbatch_consumed(struct kgsl_cmdbatch *cmdbatch,
@@ -626,6 +1011,30 @@
 		(timestamp_cmp(retired, cmdbatch->timestamp) < 0));
 }
 
+static void _print_recovery(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch)
+{
+	static struct {
+		unsigned int mask;
+		const char *str;
+	} flags[] = { ADRENO_FT_TYPES };
+
+	int i, nr = find_first_bit(&cmdbatch->fault_recovery, BITS_PER_LONG);
+	char *result = "unknown";
+
+	for (i = 0; i < ARRAY_SIZE(flags); i++) {
+		if (flags[i].mask == BIT(nr)) {
+			result = (char *) flags[i].str;
+			break;
+		}
+	}
+
+	pr_fault(device, cmdbatch,
+		"gpu %s ctx %d ts %d policy %lX\n",
+		result, cmdbatch->context->id, cmdbatch->timestamp,
+		cmdbatch->fault_recovery);
+}
+
 /**
  * adreno_dispatcher_work() - Master work handler for the dispatcher
  * @work: Pointer to the work struct for the current work queue
@@ -639,7 +1048,7 @@
 	struct adreno_device *adreno_dev =
 		container_of(dispatcher, struct adreno_device, dispatcher);
 	struct kgsl_device *device = &adreno_dev->dev;
-	int inv, count = 0;
+	int count = 0;
 
 	mutex_lock(&dispatcher->mutex);
 
@@ -667,6 +1076,14 @@
 		if (kgsl_context_detached(cmdbatch->context) ||
 			(timestamp_cmp(cmdbatch->timestamp, retired) <= 0)) {
 
+			/*
+			 * If the cmdbatch in question had faulted announce its
+			 * successful completion to the world
+			 */
+
+			if (cmdbatch->fault_recovery != 0)
+				_print_recovery(device, cmdbatch);
+
 			trace_adreno_cmdbatch_retired(cmdbatch,
 				dispatcher->inflight - 1);
 
@@ -693,8 +1110,6 @@
 			}
 
 			count++;
-
-			BUG_ON(dispatcher->inflight == 0 && dispatcher->fault);
 			continue;
 		}
 
@@ -703,17 +1118,23 @@
 		 * is to blame.  Invalidate it, reset and replay
 		 */
 
-		if (dispatcher->fault) {
-			dispatcher_do_fault(device, cmdbatch, 1);
+		if (dispatcher_do_fault(device))
 			goto done;
-		}
 
 		/* Get the last consumed timestamp */
 		consumed = kgsl_readtimestamp(device, cmdbatch->context,
 			KGSL_TIMESTAMP_CONSUMED);
 
-		/* Break here if fault detection is disabled for the context */
-		if (drawctxt->flags & CTXT_FLAGS_NO_FAULT_TOLERANCE)
+		/*
+		 * Break here if fault detection is disabled for the context or
+		 * if the long running IB detection is disaled device wide
+		 * Long running command buffers will be allowed to run to
+		 * completion - but badly behaving command buffers (infinite
+		 * shaders etc) can end up running forever.
+		 */
+
+		if (!adreno_dev->long_ib_detect ||
+			drawctxt->flags & CTXT_FLAGS_NO_FAULT_TOLERANCE)
 			break;
 
 		/*
@@ -727,23 +1148,13 @@
 
 		/* Boom goes the dynamite */
 
-		pr_err("-----------------------\n");
+		pr_fault(device, cmdbatch,
+			"gpu timeout ctx %d ts %d\n",
+			cmdbatch->context->id, cmdbatch->timestamp);
 
-		pr_err("dispatcher: expired ctx=%d ts=%d consumed=%d retired=%d\n",
-			cmdbatch->context->id, cmdbatch->timestamp, consumed,
-			retired);
-		pr_err("dispatcher: jiffies=%lu expired=%lu\n", jiffies,
-				cmdbatch->expires);
+		adreno_set_gpu_fault(adreno_dev, ADRENO_TIMEOUT_FAULT);
 
-		/*
-		 * If execution stopped after the current command batch was
-		 * consumed then invalidate the context for the current command
-		 * batch
-		 */
-
-		inv = cmdbatch_consumed(cmdbatch, consumed, retired);
-
-		dispatcher_do_fault(device, cmdbatch, inv);
+		dispatcher_do_fault(device);
 		break;
 	}
 
@@ -768,9 +1179,12 @@
 		struct kgsl_cmdbatch *cmdbatch
 			= dispatcher->cmdqueue[dispatcher->head];
 
+		/* Update the timeout timer for the next command batch */
 		mod_timer(&dispatcher->timer, cmdbatch->expires);
-	} else
+	} else {
 		del_timer_sync(&dispatcher->timer);
+		del_timer_sync(&dispatcher->fault_timer);
+	}
 
 	/* Before leaving update the pwrscale information */
 	mutex_lock(&device->mutex);
@@ -788,6 +1202,60 @@
 	queue_work(device->work_queue, &dispatcher->work);
 }
 
+/**
+ * adreno_dispatcher_queue_context() - schedule a drawctxt in the dispatcher
+ * device: pointer to the KGSL device
+ * drawctxt: pointer to the drawctxt to schedule
+ *
+ * Put a draw context on the dispatcher pending queue and schedule the
+ * dispatcher. This is used to reschedule changes that might have been blocked
+ * for sync points or other concerns
+ */
+void adreno_dispatcher_queue_context(struct kgsl_device *device,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	dispatcher_queue_context(adreno_dev, drawctxt);
+	adreno_dispatcher_schedule(device);
+}
+
+/*
+ * This is called on a regular basis while command batches are inflight.  Fault
+ * detection registers are read and compared to the existing values - if they
+ * changed then the GPU is still running.  If they are the same between
+ * subsequent calls then the GPU may have faulted
+ */
+
+void adreno_dispatcher_fault_timer(unsigned long data)
+{
+	struct adreno_device *adreno_dev = (struct adreno_device *) data;
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	/* Leave if the user decided to turn off fast hang detection */
+	if (adreno_dev->fast_hang_detect == 0)
+		return;
+
+	if (adreno_gpu_fault(adreno_dev)) {
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+	/*
+	 * Read the fault registers - if it returns 0 then they haven't changed
+	 * so mark the dispatcher as faulted and schedule the work loop.
+	 */
+
+	if (!fault_detect_read_compare(device)) {
+		adreno_set_gpu_fault(adreno_dev, ADRENO_SOFT_FAULT);
+		adreno_dispatcher_schedule(device);
+	} else {
+		mod_timer(&dispatcher->fault_timer,
+			jiffies + msecs_to_jiffies(_fault_timer_interval));
+	}
+}
+
 /*
  * This is called when the timer expires - it either means the GPU is hung or
  * the IB is taking too long to execute
@@ -800,18 +1268,15 @@
 	adreno_dispatcher_schedule(device);
 }
 /**
- * adreno_dispatcher_fault_irq() - Trigger a fault in the dispatcher
+ * adreno_dispatcher_irq_fault() - Trigger a fault in the dispatcher
  * @device: Pointer to the KGSL device
  *
  * Called from an interrupt context this will trigger a fault in the
- * dispatcher
+ * dispatcher for the oldest pending command batch
  */
-void adreno_dispatcher_fault_irq(struct kgsl_device *device)
+void adreno_dispatcher_irq_fault(struct kgsl_device *device)
 {
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
-
-	dispatcher->fault = 1;
+	adreno_set_gpu_fault(ADRENO_DEVICE(device), ADRENO_HARD_FAULT);
 	adreno_dispatcher_schedule(device);
 }
 
@@ -863,6 +1328,8 @@
 	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
 
 	del_timer_sync(&dispatcher->timer);
+	del_timer_sync(&dispatcher->fault_timer);
+
 	dispatcher->state = ADRENO_DISPATCHER_PAUSE;
 }
 
@@ -878,6 +1345,7 @@
 
 	mutex_lock(&dispatcher->mutex);
 	del_timer_sync(&dispatcher->timer);
+	del_timer_sync(&dispatcher->fault_timer);
 
 	while (dispatcher->head != dispatcher->tail) {
 		kgsl_cmdbatch_destroy(dispatcher->cmdqueue[dispatcher->head]);
@@ -953,6 +1421,8 @@
 	_context_cmdbatch_burst);
 static DISPATCHER_UINT_ATTR(cmdbatch_timeout, 0644, 0, _cmdbatch_timeout);
 static DISPATCHER_UINT_ATTR(context_queue_wait, 0644, 0, _context_queue_wait);
+static DISPATCHER_UINT_ATTR(fault_detect_interval, 0644, 0,
+	_fault_timer_interval);
 
 static struct attribute *dispatcher_attrs[] = {
 	&dispatcher_attr_inflight.attr,
@@ -960,6 +1430,7 @@
 	&dispatcher_attr_context_burst_count.attr,
 	&dispatcher_attr_cmdbatch_timeout.attr,
 	&dispatcher_attr_context_queue_wait.attr,
+	&dispatcher_attr_fault_detect_interval.attr,
 	NULL,
 };
 
@@ -1024,6 +1495,9 @@
 	setup_timer(&dispatcher->timer, adreno_dispatcher_timer,
 		(unsigned long) adreno_dev);
 
+	setup_timer(&dispatcher->fault_timer, adreno_dispatcher_fault_timer,
+		(unsigned long) adreno_dev);
+
 	INIT_WORK(&dispatcher->work, adreno_dispatcher_work);
 
 	plist_head_init(&dispatcher->pending);
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
index f8469e2..486c70a 100644
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -54,6 +54,8 @@
 #define CTXT_FLAGS_SKIP_EOF             BIT(15)
 /* Context no fault tolerance */
 #define CTXT_FLAGS_NO_FAULT_TOLERANCE  BIT(16)
+/* Force the preamble for the next submission */
+#define CTXT_FLAGS_FORCE_PREAMBLE      BIT(17)
 
 /* Symbolic table for the adreno draw context type */
 #define ADRENO_DRAWCTXT_TYPES \
diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c
index 294ae76..8fb2830 100644
--- a/drivers/gpu/msm/adreno_postmortem.c
+++ b/drivers/gpu/msm/adreno_postmortem.c
@@ -396,8 +396,8 @@
 
 int adreno_dump(struct kgsl_device *device, int manual)
 {
-	unsigned int cp_ib1_base, cp_ib1_bufsz;
-	unsigned int cp_ib2_base, cp_ib2_bufsz;
+	unsigned int cp_ib1_base;
+	unsigned int cp_ib2_base;
 	phys_addr_t pt_base, cur_pt_base;
 	unsigned int cp_rb_base, cp_rb_ctrl, rb_count;
 	unsigned int cp_rb_wptr, cp_rb_rptr;
@@ -410,7 +410,6 @@
 	unsigned int ts_processed = 0xdeaddead;
 	struct kgsl_context *context;
 	unsigned int context_id;
-	unsigned int rbbm_status;
 
 	static struct ib_list ib_list;
 
@@ -420,16 +419,10 @@
 
 	mb();
 
-	if (device->pm_dump_enable) {
-		msm_clk_dump_debug_info();
+	msm_clk_dump_debug_info();
 
-		if (adreno_dev->gpudev->postmortem_dump)
-			adreno_dev->gpudev->postmortem_dump(adreno_dev);
-	}
-
-	kgsl_regread(device,
-			adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS),
-			&rbbm_status);
+	if (adreno_dev->gpudev->postmortem_dump)
+		adreno_dev->gpudev->postmortem_dump(adreno_dev);
 
 	pt_base = kgsl_mmu_get_current_ptbase(&device->mmu);
 	cur_pt_base = pt_base;
@@ -451,29 +444,8 @@
 		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB1_BASE),
 		&cp_ib1_base);
 	kgsl_regread(device,
-		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ),
-		&cp_ib1_bufsz);
-	kgsl_regread(device,
 		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB2_BASE),
 		&cp_ib2_base);
-	kgsl_regread(device,
-		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ),
-		&cp_ib2_bufsz);
-
-	trace_adreno_gpu_fault(rbbm_status, cp_rb_rptr, cp_rb_wptr,
-			cp_ib1_base, cp_ib1_bufsz, cp_ib2_base, cp_ib2_bufsz);
-
-	/* If postmortem dump is not enabled, dump minimal set and return */
-	if (!device->pm_dump_enable) {
-
-		KGSL_LOG_DUMP(device,
-			"STATUS %08X | IB1:%08X/%08X | IB2: %08X/%08X"
-			" | RPTR: %04X | WPTR: %04X\n",
-			rbbm_status,  cp_ib1_base, cp_ib1_bufsz, cp_ib2_base,
-			cp_ib2_bufsz, cp_rb_rptr, cp_rb_wptr);
-
-		return 0;
-	}
 
 	kgsl_sharedmem_readl(&device->memstore,
 			(unsigned int *) &context_id,
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index dc1530a..1ad90fd 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -606,8 +606,8 @@
 	if (adreno_is_a20x(adreno_dev))
 		total_sizedwords += 2; /* CACHE_FLUSH */
 
-	if (flags & KGSL_CMD_FLAGS_EOF)
-		total_sizedwords += 2;
+	if (flags & KGSL_CMD_FLAGS_WFI)
+		total_sizedwords += 2; /* WFI */
 
 	if (profile_ready)
 		total_sizedwords += 6;   /* space for pre_ib and post_ib */
@@ -737,6 +737,12 @@
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 0);
 	}
 
+	if (flags & KGSL_CMD_FLAGS_WFI) {
+		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
+			cp_type3_packet(CP_WAIT_FOR_IDLE, 1));
+		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 0x00000000);
+	}
+
 	adreno_ringbuffer_submit(rb);
 
 	return 0;
@@ -1024,6 +1030,7 @@
 	struct kgsl_context *context;
 	struct adreno_context *drawctxt;
 	unsigned int start_index = 0;
+	int flags = KGSL_CMD_FLAGS_NONE;
 	int ret;
 
 	context = cmdbatch->context;
@@ -1039,10 +1046,23 @@
 	commands are stored in the first node of the IB chain. We can skip that
 	if a context switch hasn't occured */
 
-	if (drawctxt->flags & CTXT_FLAGS_PREAMBLE &&
-		adreno_dev->drawctxt_active == drawctxt)
+	if ((drawctxt->flags & CTXT_FLAGS_PREAMBLE) &&
+		!test_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv) &&
+		(adreno_dev->drawctxt_active == drawctxt))
 		start_index = 1;
 
+	/*
+	 * In skip mode don't issue the draw IBs but keep all the other
+	 * accoutrements of a submision (including the interrupt) to keep
+	 * the accounting sane. Set start_index and numibs to 0 to just
+	 * generate the start and end markers and skip everything else
+	 */
+
+	if (test_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv)) {
+		start_index = 0;
+		numibs = 0;
+	}
+
 	cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4),
 				GFP_KERNEL);
 	if (!link) {
@@ -1061,7 +1081,17 @@
 		*cmds++ = ibdesc[0].sizedwords;
 	}
 	for (i = start_index; i < numibs; i++) {
-		*cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
+
+		/*
+		 * Skip 0 sized IBs - these are presumed to have been removed
+		 * from consideration by the FT policy
+		 */
+
+		if (ibdesc[i].sizedwords == 0)
+			*cmds++ = cp_nop_packet(2);
+		else
+			*cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
+
 		*cmds++ = ibdesc[i].gpuaddr;
 		*cmds++ = ibdesc[i].sizedwords;
 	}
@@ -1085,9 +1115,12 @@
 	if (ret)
 		goto done;
 
+	if (test_bit(CMDBATCH_FLAG_WFI, &cmdbatch->priv))
+		flags = KGSL_CMD_FLAGS_WFI;
+
 	ret = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer,
 					drawctxt,
-					cmdbatch->flags,
+					flags,
 					&link[0], (cmds - link),
 					cmdbatch->timestamp);
 
diff --git a/drivers/gpu/msm/adreno_trace.h b/drivers/gpu/msm/adreno_trace.h
index 59aca2e..01194f4 100644
--- a/drivers/gpu/msm/adreno_trace.h
+++ b/drivers/gpu/msm/adreno_trace.h
@@ -61,14 +61,78 @@
 	)
 );
 
-DEFINE_EVENT(adreno_cmdbatch_template, adreno_cmdbatch_retired,
+DEFINE_EVENT(adreno_cmdbatch_template, adreno_cmdbatch_submitted,
 	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight),
 	TP_ARGS(cmdbatch, inflight)
 );
 
-DEFINE_EVENT(adreno_cmdbatch_template, adreno_cmdbatch_submitted,
+TRACE_EVENT(adreno_cmdbatch_retired,
 	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight),
-	TP_ARGS(cmdbatch, inflight)
+	TP_ARGS(cmdbatch, inflight),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, inflight)
+		__field(unsigned int, recovery)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->inflight = inflight;
+		__entry->recovery = cmdbatch->fault_recovery;
+	),
+	TP_printk(
+		"ctx=%u ts=%u inflight=%u recovery=%s",
+			__entry->id, __entry->timestamp,
+			__entry->inflight,
+			__entry->recovery ?
+				__print_flags(__entry->recovery, "|",
+				ADRENO_FT_TYPES) : "none"
+	)
+);
+
+TRACE_EVENT(adreno_cmdbatch_fault,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int fault),
+	TP_ARGS(cmdbatch, fault),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, fault)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->fault = fault;
+	),
+	TP_printk(
+		"ctx=%u ts=%u type=%s",
+			__entry->id, __entry->timestamp,
+			__print_symbolic(__entry->fault,
+				{ 0, "none" },
+				{ ADRENO_SOFT_FAULT, "soft" },
+				{ ADRENO_HARD_FAULT, "hard" },
+				{ ADRENO_TIMEOUT_FAULT, "timeout" })
+	)
+);
+
+TRACE_EVENT(adreno_cmdbatch_recovery,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int action),
+	TP_ARGS(cmdbatch, action),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, action)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->action = action;
+	),
+	TP_printk(
+		"ctx=%u ts=%u action=%s",
+			__entry->id, __entry->timestamp,
+			__print_symbolic(__entry->action, ADRENO_FT_TYPES)
+	)
 );
 
 DECLARE_EVENT_CLASS(adreno_drawctxt_template,
@@ -140,11 +204,15 @@
 );
 
 TRACE_EVENT(adreno_gpu_fault,
-	TP_PROTO(unsigned int status, unsigned int rptr, unsigned int wptr,
+	TP_PROTO(unsigned int ctx, unsigned int ts,
+		unsigned int status, unsigned int rptr, unsigned int wptr,
 		unsigned int ib1base, unsigned int ib1size,
 		unsigned int ib2base, unsigned int ib2size),
-	TP_ARGS(status, rptr, wptr, ib1base, ib1size, ib2base, ib2size),
+	TP_ARGS(ctx, ts, status, rptr, wptr, ib1base, ib1size, ib2base,
+		ib2size),
 	TP_STRUCT__entry(
+		__field(unsigned int, ctx)
+		__field(unsigned int, ts)
 		__field(unsigned int, status)
 		__field(unsigned int, rptr)
 		__field(unsigned int, wptr)
@@ -154,6 +222,8 @@
 		__field(unsigned int, ib2size)
 	),
 	TP_fast_assign(
+		__entry->ctx = ctx;
+		__entry->ts = ts;
 		__entry->status = status;
 		__entry->rptr = rptr;
 		__entry->wptr = wptr;
@@ -162,10 +232,10 @@
 		__entry->ib2base = ib2base;
 		__entry->ib2size = ib2size;
 	),
-	TP_printk("status=%X RB=%X/%X IB1=%X/%X IB2=%X/%X",
-		__entry->status, __entry->wptr, __entry->rptr,
-		__entry->ib1base, __entry->ib1size, __entry->ib2base,
-		__entry->ib2size)
+	TP_printk("ctx=%d ts=%d status=%X RB=%X/%X IB1=%X/%X IB2=%X/%X",
+		__entry->ctx, __entry->ts, __entry->status, __entry->wptr,
+		__entry->rptr, __entry->ib1base, __entry->ib1size,
+		__entry->ib2base, __entry->ib2size)
 );
 
 #endif /* _ADRENO_TRACE_H */
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index f5b27d0..f1a97ed 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -160,8 +160,12 @@
 /**
  * struct kgsl_cmdbatch - KGSl command descriptor
  * @context: KGSL context that created the command
- * @timestamp: Timestamp assigned to the command (currently unused)
+ * @timestamp: Timestamp assigned to the command
  * @flags: flags
+ * @priv: Internal flags
+ * @fault_policy: Internal policy describing how to handle this command in case
+ * of a fault
+ * @fault_recovery: recovery actions actually tried for this batch
  * @ibcount: Number of IBs in the command list
  * @ibdesc: Pointer to the list of IBs
  * @expires: Point in time when the cmdbatch is considered to be hung
@@ -172,12 +176,28 @@
 	struct kgsl_context *context;
 	uint32_t timestamp;
 	uint32_t flags;
+	unsigned long priv;
+	unsigned long fault_policy;
+	unsigned long fault_recovery;
 	uint32_t ibcount;
 	struct kgsl_ibdesc *ibdesc;
 	unsigned long expires;
 	int invalid;
 };
 
+/**
+ * enum kgsl_cmdbatch_priv - Internal cmdbatch flags
+ * @CMDBATCH_FLAG_SKIP - skip the entire command batch
+ * @CMDBATCH_FLAG_FORCE_PREAMBLE - Force the preamble on for the cmdbatch
+ * @CMDBATCH_FLAG_WFI - Force wait-for-idle for the submission
+ */
+
+enum kgsl_cmdbatch_priv {
+	CMDBATCH_FLAG_SKIP = 0,
+	CMDBATCH_FLAG_FORCE_PREAMBLE,
+	CMDBATCH_FLAG_WFI,
+};
+
 struct kgsl_device {
 	struct device *dev;
 	const char *name;
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index 103736d..e296784 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -423,8 +423,12 @@
 	 * the GPU and trigger a snapshot. To stall the transaction return
 	 * EBUSY error.
 	 */
-	if (adreno_dev->ft_pf_policy & KGSL_FT_PAGEFAULT_GPUHALT_ENABLE)
+	if (adreno_dev->ft_pf_policy & KGSL_FT_PAGEFAULT_GPUHALT_ENABLE) {
+		/* turn off GPU IRQ so we don't get faults from it too */
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+		adreno_dispatcher_irq_fault(device);
 		ret = -EBUSY;
+	}
 done:
 	return ret;
 }
diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h
index 87047d2..1eb346b 100644
--- a/include/linux/msm_kgsl.h
+++ b/include/linux/msm_kgsl.h
@@ -20,6 +20,8 @@
 #define KGSL_CONTEXT_TRASH_STATE	0x00000020
 #define KGSL_CONTEXT_PER_CONTEXT_TS	0x00000040
 #define KGSL_CONTEXT_USER_GENERATED_TS	0x00000080
+#define KGSL_CONTEXT_END_OF_FRAME	0x00000100
+
 #define KGSL_CONTEXT_NO_FAULT_TOLERANCE 0x00000200
 /* bits [12:15] are reserved for future use */
 #define KGSL_CONTEXT_TYPE_MASK          0x01F00000