Merge "msm: mdss: hdmi: HDMI cable connection status and vote"
diff --git a/arch/arm/boot/dts/msm8226.dtsi b/arch/arm/boot/dts/msm8226.dtsi
index 0ac4615..51a0795 100644
--- a/arch/arm/boot/dts/msm8226.dtsi
+++ b/arch/arm/boot/dts/msm8226.dtsi
@@ -183,8 +183,8 @@
 			<268000 348000>,
 			<505000 657000>;
 		qcom,buffer-type-tz-usage-table = <0x1 0x1>,
-			<0x2 0x2>,
-			<0x1f0 0x3>;
+			<0x6 0x2>,
+			<0x7C0 0x3>;
 		qcom,max-hw-load = <352800>; /* 720p @ 30 + 1080p @ 30 */
 		qcom,vidc-iommu-domains {
 			qcom,domain-ns {
diff --git a/arch/arm/boot/dts/msm8610.dtsi b/arch/arm/boot/dts/msm8610.dtsi
index 62776d8..9406b75 100644
--- a/arch/arm/boot/dts/msm8610.dtsi
+++ b/arch/arm/boot/dts/msm8610.dtsi
@@ -182,7 +182,7 @@
 		compatible = "qcom,msm-vidc";
 		qcom,vidc-ns-map = <0x40000000 0x40000000>;
 		qcom,buffer-type-tz-usage-map = <0x1 0x1>,
-						<0x1fe 0x2>;
+						<0x7fe 0x2>;
 		qcom,hfi = "q6";
 		qcom,max-hw-load = <244800>; /* 1080p @ 30 * 1 */
 		qcom,vidc-iommu-domains {
diff --git a/arch/arm/boot/dts/msm8974-v1.dtsi b/arch/arm/boot/dts/msm8974-v1.dtsi
index 249c963..556e912 100644
--- a/arch/arm/boot/dts/msm8974-v1.dtsi
+++ b/arch/arm/boot/dts/msm8974-v1.dtsi
@@ -134,7 +134,7 @@
 	qcom,iommu-groups = <&venus_domain_ns &venus_domain_cp>;
 	qcom,iommu-group-buffer-types = <0xfff 0x1ff>;
 	qcom,buffer-type-tz-usage-table = <0x1 0x1>,
-					<0x1fe 0x2>;
+					<0x7fe 0x2>;
 };
 
 &sfpb_spinlock {
diff --git a/arch/arm/boot/dts/msm8974-v2.dtsi b/arch/arm/boot/dts/msm8974-v2.dtsi
index 55e18f0..0da5658 100644
--- a/arch/arm/boot/dts/msm8974-v2.dtsi
+++ b/arch/arm/boot/dts/msm8974-v2.dtsi
@@ -136,9 +136,9 @@
 		<3240000 1600000>,
 		<4048000 1600000>,
 		<4264000 1600000>;
-	qcom,buffer-type-tz-usage-table = <0x91 0x1>,
-					<0x42 0x2>,
-					<0x120 0x3>;
+	qcom,buffer-type-tz-usage-table = <0x241 0x1>,
+					<0x106 0x2>,
+					<0x480 0x3>;
 	qcom,vidc-iommu-domains {
 		qcom,domain-ns {
 			qcom,vidc-domain-phandle = <&venus_domain_ns>;
diff --git a/arch/arm/boot/dts/msm8974pro-pm8941.dtsi b/arch/arm/boot/dts/msm8974pro-pm8941.dtsi
index 0f37584..2c06c3c 100644
--- a/arch/arm/boot/dts/msm8974pro-pm8941.dtsi
+++ b/arch/arm/boot/dts/msm8974pro-pm8941.dtsi
@@ -38,18 +38,22 @@
 
 &krait0_vreg {
 		regulator-max-microvolt = <1120000>;
+		qcom,ldo-delta-voltage = <25000>;
 };
 
 &krait1_vreg {
 		regulator-max-microvolt = <1120000>;
+		qcom,ldo-delta-voltage = <25000>;
 };
 
 &krait2_vreg {
 		regulator-max-microvolt = <1120000>;
+		qcom,ldo-delta-voltage = <25000>;
 };
 
 &krait3_vreg {
 		regulator-max-microvolt = <1120000>;
+		qcom,ldo-delta-voltage = <25000>;
 };
 
 &tspp {
diff --git a/arch/arm/boot/dts/msm8974pro-pma8084-regulator.dtsi b/arch/arm/boot/dts/msm8974pro-pma8084-regulator.dtsi
index df00f8a..00e3b8b 100644
--- a/arch/arm/boot/dts/msm8974pro-pma8084-regulator.dtsi
+++ b/arch/arm/boot/dts/msm8974pro-pma8084-regulator.dtsi
@@ -494,7 +494,7 @@
 			qcom,retention-voltage = <675000>;
 			qcom,ldo-default-voltage = <750000>;
 			qcom,ldo-threshold-voltage = <850000>;
-			qcom,ldo-delta-voltage = <50000>;
+			qcom,ldo-delta-voltage = <25000>;
 			qcom,cpu-num = <0>;
 		};
 
@@ -510,7 +510,7 @@
 			qcom,retention-voltage = <675000>;
 			qcom,ldo-default-voltage = <750000>;
 			qcom,ldo-threshold-voltage = <850000>;
-			qcom,ldo-delta-voltage = <50000>;
+			qcom,ldo-delta-voltage = <25000>;
 			qcom,cpu-num = <1>;
 		};
 
@@ -526,7 +526,7 @@
 			qcom,retention-voltage = <675000>;
 			qcom,ldo-default-voltage = <750000>;
 			qcom,ldo-threshold-voltage = <850000>;
-			qcom,ldo-delta-voltage = <50000>;
+			qcom,ldo-delta-voltage = <25000>;
 			qcom,cpu-num = <2>;
 		};
 
@@ -542,7 +542,7 @@
 			qcom,retention-voltage = <675000>;
 			qcom,ldo-default-voltage = <750000>;
 			qcom,ldo-threshold-voltage = <850000>;
-			qcom,ldo-delta-voltage = <50000>;
+			qcom,ldo-delta-voltage = <25000>;
 			qcom,cpu-num = <3>;
 		};
 	};
diff --git a/arch/arm/boot/dts/msm8974pro.dtsi b/arch/arm/boot/dts/msm8974pro.dtsi
index bd58653..6d5c862 100644
--- a/arch/arm/boot/dts/msm8974pro.dtsi
+++ b/arch/arm/boot/dts/msm8974pro.dtsi
@@ -208,9 +208,9 @@
 		<4048000 1600000>,
 		<4264000 1600000>;
 	qcom,max-hw-load = <1281600>; /* max(4k X 2304 @ 24, 4k X 2160 @ 30) + 1080p @ 30 */
-	qcom,buffer-type-tz-usage-table = <0x91 0x1>,
-					<0x42 0x2>,
-					<0x120 0x3>;
+	qcom,buffer-type-tz-usage-table = <0x241 0x1>,
+					<0x106 0x2>,
+					<0x480 0x3>;
 	qcom,vidc-iommu-domains {
 		qcom,domain-ns {
 			qcom,vidc-domain-phandle = <&venus_domain_ns>;
diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile
index 118e033..aac183b 100644
--- a/drivers/gpu/msm/Makefile
+++ b/drivers/gpu/msm/Makefile
@@ -22,9 +22,11 @@
 msm_adreno-y += \
 	adreno_ringbuffer.o \
 	adreno_drawctxt.o \
+	adreno_dispatch.o \
 	adreno_postmortem.o \
 	adreno_snapshot.o \
 	adreno_coresight.o \
+	adreno_trace.o \
 	adreno_a2xx.o \
 	adreno_a2xx_trace.o \
 	adreno_a2xx_snapshot.o \
diff --git a/drivers/gpu/msm/a2xx_reg.h b/drivers/gpu/msm/a2xx_reg.h
index c70c4eb..b2fb99f 100644
--- a/drivers/gpu/msm/a2xx_reg.h
+++ b/drivers/gpu/msm/a2xx_reg.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -252,7 +252,15 @@
 #define REG_CP_CSQ_IB1_STAT              0x01FE
 #define REG_CP_CSQ_IB2_STAT              0x01FF
 #define REG_CP_CSQ_RB_STAT               0x01FD
+
 #define REG_CP_DEBUG                     0x01FC
+/*
+ * CP DEBUG settings for a3xx and a2xx cores:
+ * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control
+ * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF
+ */
+#define A2XX_CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25))
+
 #define REG_CP_IB1_BASE                  0x0458
 #define REG_CP_IB1_BUFSZ                 0x0459
 #define REG_CP_IB2_BASE                  0x045A
diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h
index b5945da..676f46d 100644
--- a/drivers/gpu/msm/a3xx_reg.h
+++ b/drivers/gpu/msm/a3xx_reg.h
@@ -493,6 +493,9 @@
 #define RBBM_RBBM_CTL_ENABLE_PWR_CTR0  BIT(16)
 #define RBBM_RBBM_CTL_ENABLE_PWR_CTR1  BIT(17)
 
+/* Bit flag for RBMM_PERFCTR_CTL */
+#define RBBM_PERFCTR_CTL_ENABLE BIT(0)
+
 /* Various flags used by the context switch code */
 
 #define SP_MULTI 0
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index f16f9e4..a271388 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -32,6 +32,7 @@
 
 #include "adreno.h"
 #include "adreno_pm4types.h"
+#include "adreno_trace.h"
 
 #include "a2xx_reg.h"
 #include "a3xx_reg.h"
@@ -39,6 +40,9 @@
 #define DRIVER_VERSION_MAJOR   3
 #define DRIVER_VERSION_MINOR   1
 
+/* Number of times to try hard reset */
+#define NUM_TIMES_RESET_RETRY 5
+
 /* Adreno MH arbiter config*/
 #define ADRENO_CFG_MHARB \
 	(0x10 \
@@ -107,7 +111,6 @@
 		.drv_log = KGSL_LOG_LEVEL_DEFAULT,
 		.mem_log = KGSL_LOG_LEVEL_DEFAULT,
 		.pwr_log = KGSL_LOG_LEVEL_DEFAULT,
-		.ft_log = KGSL_LOG_LEVEL_DEFAULT,
 		.pm_dump_enable = 0,
 	},
 	.gmem_base = 0,
@@ -122,12 +125,6 @@
 	.long_ib_detect = 1,
 };
 
-/* This set of registers are used for Hang detection
- * If the values of these registers are same after
- * KGSL_TIMEOUT_PART time, GPU hang is reported in
- * kernel log.
- */
-
 unsigned int ft_detect_regs[FT_DETECT_REGS_COUNT];
 
 /*
@@ -164,6 +161,12 @@
 	unsigned int pfp_jt_idx;
 	/* PFP jump table load addr */
 	unsigned int pfp_jt_addr;
+	/* PM4 bootstrap loader size */
+	unsigned int pm4_bstrp_size;
+	/* PFP bootstrap loader size */
+	unsigned int pfp_bstrp_size;
+	/* PFP bootstrap loader supported version */
+	unsigned int pfp_bstrp_ver;
 
 } adreno_gpulist[] = {
 	{ ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID,
@@ -201,7 +204,8 @@
 		512, 0, 2, SZ_512K, 0x3FF037, 0x3FF016 },
 	{ ADRENO_REV_A330, 3, 3, 0, ANY_ID,
 		"a330_pm4.fw", "a330_pfp.fw", &adreno_a3xx_gpudev,
-		512, 0, 2, SZ_1M, NO_VER, NO_VER, 0x8AD, 0x2E4, 0x201, 0x200 },
+		512, 0, 2, SZ_1M, NO_VER, NO_VER, 0x8AD, 0x2E4, 0x201, 0x200,
+		0x6, 0x20, 0x330020 },
 	{ ADRENO_REV_A305B, 3, 0, 5, 0x10,
 		"a330_pm4.fw", "a330_pfp.fw", &adreno_a3xx_gpudev,
 		512, 0, 2, SZ_128K, NO_VER, NO_VER, 0x8AD, 0x2E4,
@@ -216,8 +220,6 @@
 		512, 0, 2, SZ_128K, 0x3FF037, 0x3FF016 },
 };
 
-static unsigned int adreno_isidle(struct kgsl_device *device);
-
 /**
  * adreno_perfcounter_init: Reserve kernel performance counters
  * @device: device to configure
@@ -229,32 +231,47 @@
  * performance counters will remain active as long as the device is alive.
  */
 
-static void adreno_perfcounter_init(struct kgsl_device *device)
+static int adreno_perfcounter_init(struct kgsl_device *device)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
 	if (adreno_dev->gpudev->perfcounter_init)
-		adreno_dev->gpudev->perfcounter_init(adreno_dev);
+		return adreno_dev->gpudev->perfcounter_init(adreno_dev);
+	return 0;
 };
 
 /**
+ * adreno_perfcounter_close: Release counters initialized by
+ * adreno_perfcounter_init
+ * @device: device to realease counters for
+ *
+ */
+static void adreno_perfcounter_close(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	if (adreno_dev->gpudev->perfcounter_close)
+		return adreno_dev->gpudev->perfcounter_close(adreno_dev);
+}
+
+/**
  * adreno_perfcounter_start: Enable performance counters
  * @adreno_dev: Adreno device to configure
  *
  * Ensure all performance counters are enabled that are allocated.  Since
  * the device was most likely stopped, we can't trust that the counters
  * are still valid so make it so.
+ * Returns 0 on success else error code
  */
 
-static void adreno_perfcounter_start(struct adreno_device *adreno_dev)
+static int adreno_perfcounter_start(struct adreno_device *adreno_dev)
 {
 	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
 	struct adreno_perfcount_group *group;
 	unsigned int i, j;
+	int ret = 0;
 
-	/* perfcounter start does nothing on a2xx */
-	if (adreno_is_a2xx(adreno_dev))
-		return;
+	if (NULL == counters)
+		return 0;
 
 	/* group id iter */
 	for (i = 0; i < counters->group_count; i++) {
@@ -269,11 +286,15 @@
 				continue;
 
 			if (adreno_dev->gpudev->perfcounter_enable)
-				adreno_dev->gpudev->perfcounter_enable(
+				ret = adreno_dev->gpudev->perfcounter_enable(
 					adreno_dev, i, j,
 					group->regs[j].countable);
+				if (ret)
+					goto done;
 		}
 	}
+done:
+	return ret;
 }
 
 /**
@@ -287,7 +308,7 @@
  */
 
 int adreno_perfcounter_read_group(struct adreno_device *adreno_dev,
-	struct kgsl_perfcounter_read_group *reads, unsigned int count)
+	struct kgsl_perfcounter_read_group __user *reads, unsigned int count)
 {
 	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
 	struct adreno_perfcount_group *group;
@@ -295,8 +316,7 @@
 	unsigned int i, j;
 	int ret = 0;
 
-	/* perfcounter get/put/query/read not allowed on a2xx */
-	if (adreno_is_a2xx(adreno_dev))
+	if (NULL == counters)
 		return -EINVAL;
 
 	/* sanity check for later */
@@ -307,12 +327,6 @@
 	if (reads == NULL || count == 0 || count > 100)
 		return -EINVAL;
 
-	/* verify valid inputs group ids and countables */
-	for (i = 0; i < count; i++) {
-		if (reads[i].groupid >= counters->group_count)
-			return -EINVAL;
-	}
-
 	list = kmalloc(sizeof(struct kgsl_perfcounter_read_group) * count,
 			GFP_KERNEL);
 	if (!list)
@@ -326,8 +340,15 @@
 
 	/* list iterator */
 	for (j = 0; j < count; j++) {
+
 		list[j].value = 0;
 
+		/* Verify that the group ID is within range */
+		if (list[j].groupid >= counters->group_count) {
+			ret = -EINVAL;
+			goto done;
+		}
+
 		group = &(counters->groups[list[j].groupid]);
 
 		/* group/counter iterator */
@@ -335,8 +356,7 @@
 			if (group->regs[i].countable == list[j].countable) {
 				list[j].value =
 					adreno_dev->gpudev->perfcounter_read(
-					adreno_dev, list[j].groupid,
-					i, group->regs[i].offset);
+					adreno_dev, list[j].groupid, i);
 				break;
 			}
 		}
@@ -372,8 +392,7 @@
 	if (name == NULL)
 		return -EINVAL;
 
-	/* perfcounter get/put/query not allowed on a2xx */
-	if (adreno_is_a2xx(adreno_dev))
+	if (NULL == counters)
 		return -EINVAL;
 
 	for (i = 0; i < counters->group_count; ++i) {
@@ -398,8 +417,7 @@
 {
 	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
 
-	/* perfcounter get/put/query not allowed on a2xx */
-	if (adreno_is_a2xx(adreno_dev))
+	if (NULL == counters)
 		return NULL;
 
 	if (groupid >= counters->group_count)
@@ -429,8 +447,7 @@
 
 	*max_counters = 0;
 
-	/* perfcounter get/put/query not allowed on a2xx */
-	if (adreno_is_a2xx(adreno_dev))
+	if (NULL == counters)
 		return -EINVAL;
 
 	if (groupid >= counters->group_count)
@@ -479,13 +496,13 @@
 	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
 	struct adreno_perfcount_group *group;
 	unsigned int i, empty = -1;
+	int ret = 0;
 
 	/* always clear return variables */
 	if (offset)
 		*offset = 0;
 
-	/* perfcounter get/put/query not allowed on a2xx */
-	if (adreno_is_a2xx(adreno_dev))
+	if (NULL == counters)
 		return -EINVAL;
 
 	if (groupid >= counters->group_count)
@@ -520,6 +537,11 @@
 	if (empty == -1)
 		return -EBUSY;
 
+	/* enable the new counter */
+	ret = adreno_dev->gpudev->perfcounter_enable(adreno_dev, groupid, empty,
+		countable);
+	if (ret)
+		return ret;
 	/* initialize the new counter */
 	group->regs[empty].countable = countable;
 
@@ -532,14 +554,10 @@
 		group->regs[empty].usercount = 1;
 	}
 
-	/* enable the new counter */
-	adreno_dev->gpudev->perfcounter_enable(adreno_dev, groupid, empty,
-		countable);
-
 	if (offset)
 		*offset = group->regs[empty].offset;
 
-	return 0;
+	return ret;
 }
 
 
@@ -561,8 +579,7 @@
 
 	unsigned int i;
 
-	/* perfcounter get/put/query not allowed on a2xx */
-	if (adreno_is_a2xx(adreno_dev))
+	if (NULL == counters)
 		return -EINVAL;
 
 	if (groupid >= counters->group_count)
@@ -601,23 +618,9 @@
 
 static irqreturn_t adreno_irq_handler(struct kgsl_device *device)
 {
-	irqreturn_t result;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
-	result = adreno_dev->gpudev->irq_handler(adreno_dev);
-
-	device->pwrctrl.irq_last = 1;
-	if (device->requested_state == KGSL_STATE_NONE) {
-		kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP);
-		queue_work(device->work_queue, &device->idle_check_ws);
-	}
-
-	/* Reset the time-out in our idle timer */
-	mod_timer_pending(&device->idle_timer,
-		jiffies + device->pwrctrl.interval_timeout);
-	mod_timer_pending(&device->hang_timer,
-		(jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART)));
-	return result;
+	return adreno_dev->gpudev->irq_handler(adreno_dev);
 }
 
 static void adreno_cleanup_pt(struct kgsl_device *device,
@@ -930,7 +933,7 @@
 		adreno_dev->dev.cff_dump_enable);
 }
 
-static void adreno_iommu_setstate(struct kgsl_device *device,
+static int adreno_iommu_setstate(struct kgsl_device *device,
 					unsigned int context_id,
 					uint32_t flags)
 {
@@ -943,22 +946,26 @@
 	struct kgsl_context *context;
 	struct adreno_context *adreno_ctx = NULL;
 	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
+	unsigned int result;
 
 	if (adreno_use_default_setstate(adreno_dev)) {
 		kgsl_mmu_device_setstate(&device->mmu, flags);
-		return;
+		return 0;
 	}
 	num_iommu_units = kgsl_mmu_get_num_iommu_units(&device->mmu);
 
 	context = kgsl_context_get(device, context_id);
-	if (context == NULL)
-		return;
+	if (context == NULL) {
+		kgsl_mmu_device_setstate(&device->mmu, KGSL_CONTEXT_INVALID);
+		return -EINVAL;
+	}
 
 	adreno_ctx = ADRENO_CONTEXT(context);
 
-	if (kgsl_mmu_enable_clk(&device->mmu,
-				KGSL_IOMMU_CONTEXT_USER))
-		return;
+	result = kgsl_mmu_enable_clk(&device->mmu, KGSL_IOMMU_CONTEXT_USER);
+
+	if (result)
+		goto done;
 
 	pt_val = kgsl_mmu_get_pt_base_addr(&device->mmu,
 				device->mmu.hwpagetable);
@@ -992,14 +999,24 @@
 	 * This returns the per context timestamp but we need to
 	 * use the global timestamp for iommu clock disablement
 	 */
-	adreno_ringbuffer_issuecmds(device, adreno_ctx, KGSL_CMD_FLAGS_PMODE,
-			&link[0], sizedwords);
+	result = adreno_ringbuffer_issuecmds(device, adreno_ctx,
+			KGSL_CMD_FLAGS_PMODE, &link[0], sizedwords);
 
-	kgsl_mmu_disable_clk_on_ts(&device->mmu, rb->global_ts, true);
+	/*
+	 * On error disable the IOMMU clock right away otherwise turn it off
+	 * after the command has been retired
+	 */
+	if (result)
+		kgsl_mmu_disable_clk_on_ts(&device->mmu, 0, false);
+	else
+		kgsl_mmu_disable_clk_on_ts(&device->mmu, rb->global_ts, true);
+
+done:
 	kgsl_context_put(context);
+	return result;
 }
 
-static void adreno_gpummu_setstate(struct kgsl_device *device,
+static int adreno_gpummu_setstate(struct kgsl_device *device,
 					unsigned int context_id,
 					uint32_t flags)
 {
@@ -1010,6 +1027,7 @@
 	unsigned int mh_mmu_invalidate = 0x00000003; /*invalidate all and tc */
 	struct kgsl_context *context;
 	struct adreno_context *adreno_ctx = NULL;
+	int ret = 0;
 
 	/*
 	 * Fix target freeze issue by adding TLB flush for each submit
@@ -1026,7 +1044,8 @@
 	if (!adreno_use_default_setstate(adreno_dev)) {
 		context = kgsl_context_get(device, context_id);
 		if (context == NULL)
-			return;
+			return -EINVAL;
+
 		adreno_ctx = ADRENO_CONTEXT(context);
 
 		if (flags & KGSL_MMUFLAGS_PTUPDATE) {
@@ -1101,7 +1120,7 @@
 			sizedwords += 2;
 		}
 
-		adreno_ringbuffer_issuecmds(device, adreno_ctx,
+		ret = adreno_ringbuffer_issuecmds(device, adreno_ctx,
 					KGSL_CMD_FLAGS_PMODE,
 					&link[0], sizedwords);
 
@@ -1109,9 +1128,11 @@
 	} else {
 		kgsl_mmu_device_setstate(&device->mmu, flags);
 	}
+
+	return ret;
 }
 
-static void adreno_setstate(struct kgsl_device *device,
+static int adreno_setstate(struct kgsl_device *device,
 			unsigned int context_id,
 			uint32_t flags)
 {
@@ -1120,6 +1141,8 @@
 		return adreno_gpummu_setstate(device, context_id, flags);
 	else if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype())
 		return adreno_iommu_setstate(device, context_id, flags);
+
+	return 0;
 }
 
 static unsigned int
@@ -1238,8 +1261,11 @@
 	adreno_dev->gmem_size = adreno_gpulist[i].gmem_size;
 	adreno_dev->pm4_jt_idx = adreno_gpulist[i].pm4_jt_idx;
 	adreno_dev->pm4_jt_addr = adreno_gpulist[i].pm4_jt_addr;
+	adreno_dev->pm4_bstrp_size = adreno_gpulist[i].pm4_bstrp_size;
 	adreno_dev->pfp_jt_idx = adreno_gpulist[i].pfp_jt_idx;
 	adreno_dev->pfp_jt_addr = adreno_gpulist[i].pfp_jt_addr;
+	adreno_dev->pfp_bstrp_size = adreno_gpulist[i].pfp_bstrp_size;
+	adreno_dev->pfp_bstrp_ver = adreno_gpulist[i].pfp_bstrp_ver;
 	adreno_dev->gpulist_index = i;
 	/*
 	 * Initialize uninitialzed gpu registers, only needs to be done once
@@ -1338,7 +1364,12 @@
 		&pdata->init_level))
 		pdata->init_level = 1;
 
-	if (adreno_of_read_property(parent, "qcom,step-pwrlevel",
+	/*
+	 * qcom,step-pwrlevel isn't required so don't spam the kernel log
+	 * if it isn't found
+	 */
+
+	if (of_property_read_u32(parent, "qcom,step-pwrlevel",
 		&pdata->step_mul))
 		pdata->step_mul = 1;
 
@@ -1595,6 +1626,10 @@
 	if (status)
 		goto error_close_rb;
 
+	status = adreno_dispatcher_init(adreno_dev);
+	if (status)
+		goto error_close_device;
+
 	adreno_debugfs_init(device);
 	adreno_profile_init(device);
 
@@ -1610,6 +1645,8 @@
 
 	return 0;
 
+error_close_device:
+	kgsl_device_platform_remove(device);
 error_close_rb:
 	adreno_ringbuffer_close(&adreno_dev->ringbuffer);
 error:
@@ -1632,9 +1669,13 @@
 	kgsl_pwrscale_detach_policy(device);
 	kgsl_pwrscale_close(device);
 
+	adreno_dispatcher_close(adreno_dev);
 	adreno_ringbuffer_close(&adreno_dev->ringbuffer);
+	adreno_perfcounter_close(device);
 	kgsl_device_platform_remove(device);
 
+	clear_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv);
+
 	return 0;
 }
 
@@ -1642,9 +1683,15 @@
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	int i;
+	int ret;
 
-	if (KGSL_STATE_DUMP_AND_FT != device->state)
-		kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
+	/*
+	 * initialization only needs to be done once initially until
+	 * device is shutdown
+	 */
+	if (test_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv))
+		return 0;
 
 	/* Power up the device */
 	kgsl_pwrctrl_enable(device);
@@ -1670,6 +1717,7 @@
 		BUG_ON(1);
 	}
 
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
 	/*
 	 * Check if firmware supports the sync lock PM4 packets needed
 	 * for IOMMUv1
@@ -1697,7 +1745,9 @@
 	for (i = 6; i < FT_DETECT_REGS_COUNT; i++)
 		ft_detect_regs[i] = 0;
 
-	adreno_perfcounter_init(device);
+	ret = adreno_perfcounter_init(device);
+	if (ret)
+		goto done;
 
 	/* Power down the device */
 	kgsl_pwrctrl_disable(device);
@@ -1706,7 +1756,9 @@
 	if (adreno_is_a3xx(adreno_dev))
 		adreno_a3xx_pwron_fixup_init(adreno_dev);
 
-	return 0;
+	set_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv);
+done:
+	return ret;
 }
 
 static int adreno_start(struct kgsl_device *device)
@@ -1718,13 +1770,15 @@
 
 	kgsl_cffdump_open(device);
 
-	if (KGSL_STATE_DUMP_AND_FT != device->state)
-		kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
 
 	regulator_left_on = (regulator_is_enabled(device->pwrctrl.gpu_reg) ||
 				(device->pwrctrl.gpu_cx &&
 				regulator_is_enabled(device->pwrctrl.gpu_cx)));
 
+	/* Clear any GPU faults that might have been left over */
+	adreno_clear_gpu_fault(adreno_dev);
+
 	/* Power up the device */
 	kgsl_pwrctrl_enable(device);
 
@@ -1769,19 +1823,23 @@
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
 	device->ftbl->irqctrl(device, 1);
 
-	status = adreno_ringbuffer_start(&adreno_dev->ringbuffer);
+	status = adreno_ringbuffer_cold_start(&adreno_dev->ringbuffer);
 	if (status)
 		goto error_irq_off;
 
-	mod_timer(&device->hang_timer,
-		(jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART)));
+	status = adreno_perfcounter_start(adreno_dev);
+	if (status)
+		goto error_rb_stop;
 
-	adreno_perfcounter_start(adreno_dev);
+	/* Start the dispatcher */
+	adreno_dispatcher_start(device);
 
 	device->reset_counter++;
 
 	return 0;
 
+error_rb_stop:
+	adreno_ringbuffer_stop(&adreno_dev->ringbuffer);
 error_irq_off:
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
 
@@ -1789,11 +1847,9 @@
 	kgsl_mmu_stop(&device->mmu);
 
 error_clk_off:
-	if (KGSL_STATE_DUMP_AND_FT != device->state) {
-		kgsl_pwrctrl_disable(device);
-		/* set the state back to original state */
-		kgsl_pwrctrl_set_state(device, state);
-	}
+	kgsl_pwrctrl_disable(device);
+	/* set the state back to original state */
+	kgsl_pwrctrl_set_state(device, state);
 
 	return status;
 }
@@ -1807,6 +1863,7 @@
 
 	adreno_dev->drawctxt_active = NULL;
 
+	adreno_dispatcher_stop(adreno_dev);
 	adreno_ringbuffer_stop(&adreno_dev->ringbuffer);
 
 	kgsl_mmu_stop(&device->mmu);
@@ -1814,7 +1871,6 @@
 	device->ftbl->irqctrl(device, 0);
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
 	del_timer_sync(&device->idle_timer);
-	del_timer_sync(&device->hang_timer);
 
 	adreno_ocmem_gmem_free(adreno_dev);
 
@@ -1826,921 +1882,60 @@
 	return 0;
 }
 
-/*
- * Set the reset status of all contexts to
- * INNOCENT_CONTEXT_RESET_EXT except for the bad context
- * since thats the guilty party, if fault tolerance failed then
- * mark all as guilty
- */
-
-static int _mark_context_status(int id, void *ptr, void *data)
-{
-	unsigned int ft_status = *((unsigned int *) data);
-	struct kgsl_context *context = ptr;
-	struct adreno_context *adreno_context = ADRENO_CONTEXT(context);
-
-	if (ft_status) {
-		context->reset_status =
-				KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
-		adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
-	} else if (KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT !=
-		context->reset_status) {
-		if (adreno_context->flags & (CTXT_FLAGS_GPU_HANG |
-			CTXT_FLAGS_GPU_HANG_FT))
-			context->reset_status =
-			KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
-		else
-			context->reset_status =
-			KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT;
-	}
-
-	return 0;
-}
-
-static void adreno_mark_context_status(struct kgsl_device *device,
-					int ft_status)
-{
-	/* Mark the status for all the contexts in the device */
-
-	read_lock(&device->context_lock);
-	idr_for_each(&device->context_idr, _mark_context_status, &ft_status);
-	read_unlock(&device->context_lock);
-}
-
-/*
- * For hung contexts set the current memstore value to the most recent issued
- * timestamp - this resets the status and lets the system continue on
- */
-
-static int _set_max_ts(int id, void *ptr, void *data)
-{
-	struct kgsl_device *device = data;
-	struct kgsl_context *context = ptr;
-	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
-
-	if (drawctxt && drawctxt->flags & CTXT_FLAGS_GPU_HANG) {
-		kgsl_sharedmem_writel(device, &device->memstore,
-			KGSL_MEMSTORE_OFFSET(context->id,
-			soptimestamp), drawctxt->timestamp);
-		kgsl_sharedmem_writel(device, &device->memstore,
-			KGSL_MEMSTORE_OFFSET(context->id,
-			eoptimestamp), drawctxt->timestamp);
-	}
-
-	return 0;
-}
-
-static void adreno_set_max_ts_for_bad_ctxs(struct kgsl_device *device)
-{
-	read_lock(&device->context_lock);
-	idr_for_each(&device->context_idr, _set_max_ts, device);
-	read_unlock(&device->context_lock);
-}
-
-static void adreno_destroy_ft_data(struct adreno_ft_data *ft_data)
-{
-	vfree(ft_data->rb_buffer);
-	vfree(ft_data->bad_rb_buffer);
-	vfree(ft_data->good_rb_buffer);
-}
-
-static int _find_start_of_cmd_seq(struct adreno_ringbuffer *rb,
-					unsigned int *ptr,
-					bool inc)
-{
-	int status = -EINVAL;
-	unsigned int val1;
-	unsigned int size = rb->buffer_desc.size;
-	unsigned int start_ptr = *ptr;
-
-	while ((start_ptr / sizeof(unsigned int)) != rb->wptr) {
-		if (inc)
-			start_ptr = adreno_ringbuffer_inc_wrapped(start_ptr,
-									size);
-		else
-			start_ptr = adreno_ringbuffer_dec_wrapped(start_ptr,
-									size);
-		kgsl_sharedmem_readl(&rb->buffer_desc, &val1, start_ptr);
-		/* Ensure above read is finished before next read */
-		rmb();
-		if (KGSL_CMD_IDENTIFIER == val1) {
-			if ((start_ptr / sizeof(unsigned int)) != rb->wptr)
-				start_ptr = adreno_ringbuffer_dec_wrapped(
-							start_ptr, size);
-				*ptr = start_ptr;
-				status = 0;
-				break;
-		}
-	}
-	return status;
-}
-
-static int _find_cmd_seq_after_eop_ts(struct adreno_ringbuffer *rb,
-					unsigned int *rb_rptr,
-					unsigned int global_eop,
-					bool inc)
-{
-	int status = -EINVAL;
-	unsigned int temp_rb_rptr = *rb_rptr;
-	unsigned int size = rb->buffer_desc.size;
-	unsigned int val[3];
-	int i = 0;
-	bool check = false;
-
-	if (inc && temp_rb_rptr / sizeof(unsigned int) != rb->wptr)
-		return status;
-
-	do {
-		/*
-		 * when decrementing we need to decrement first and
-		 * then read make sure we cover all the data
-		 */
-		if (!inc)
-			temp_rb_rptr = adreno_ringbuffer_dec_wrapped(
-					temp_rb_rptr, size);
-		kgsl_sharedmem_readl(&rb->buffer_desc, &val[i],
-					temp_rb_rptr);
-		/* Ensure above read is finished before next read */
-		rmb();
-
-		if (check && ((inc && val[i] == global_eop) ||
-			(!inc && (val[i] ==
-			cp_type3_packet(CP_MEM_WRITE, 2) ||
-			val[i] == CACHE_FLUSH_TS)))) {
-			/* decrement i, i.e i = (i - 1 + 3) % 3 if
-			 * we are going forward, else increment i */
-			i = (i + 2) % 3;
-			if (val[i] == rb->device->memstore.gpuaddr +
-				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-						eoptimestamp)) {
-				int j = ((i + 2) % 3);
-				if ((inc && (val[j] == CACHE_FLUSH_TS ||
-						val[j] == cp_type3_packet(
-							CP_MEM_WRITE, 2))) ||
-					(!inc && val[j] == global_eop)) {
-						/* Found the global eop */
-						status = 0;
-						break;
-				}
-			}
-			/* if no match found then increment i again
-			 * since we decremented before matching */
-			i = (i + 1) % 3;
-		}
-		if (inc)
-			temp_rb_rptr = adreno_ringbuffer_inc_wrapped(
-						temp_rb_rptr, size);
-
-		i = (i + 1) % 3;
-		if (2 == i)
-			check = true;
-	} while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr);
-	/* temp_rb_rptr points to the command stream after global eop,
-	 * move backward till the start of command sequence */
-	if (!status) {
-		status = _find_start_of_cmd_seq(rb, &temp_rb_rptr, false);
-		if (!status) {
-			*rb_rptr = temp_rb_rptr;
-			KGSL_FT_INFO(rb->device,
-			"Offset of cmd sequence after eop timestamp: 0x%x\n",
-			temp_rb_rptr / sizeof(unsigned int));
-		}
-	}
-	if (status)
-		KGSL_FT_ERR(rb->device,
-		"Failed to find the command sequence after eop timestamp %x\n",
-		global_eop);
-	return status;
-}
-
-static int _find_hanging_ib_sequence(struct adreno_ringbuffer *rb,
-				unsigned int *rb_rptr,
-				unsigned int ib1)
-{
-	int status = -EINVAL;
-	unsigned int temp_rb_rptr = *rb_rptr;
-	unsigned int size = rb->buffer_desc.size;
-	unsigned int val[2];
-	int i = 0;
-	bool check = false;
-	bool ctx_switch = false;
-
-	while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr) {
-		kgsl_sharedmem_readl(&rb->buffer_desc, &val[i], temp_rb_rptr);
-		/* Ensure above read is finished before next read */
-		rmb();
-
-		if (check && val[i] == ib1) {
-			/* decrement i, i.e i = (i - 1 + 2) % 2 */
-			i = (i + 1) % 2;
-			if (adreno_cmd_is_ib(val[i])) {
-				/* go till start of command sequence */
-				status = _find_start_of_cmd_seq(rb,
-						&temp_rb_rptr, false);
-
-				KGSL_FT_INFO(rb->device,
-				"Found the hanging IB at offset 0x%x\n",
-				temp_rb_rptr / sizeof(unsigned int));
-				break;
-			}
-			/* if no match the increment i since we decremented
-			 * before checking */
-			i = (i + 1) % 2;
-		}
-		/* Make sure you do not encounter a context switch twice, we can
-		 * encounter it once for the bad context as the start of search
-		 * can point to the context switch */
-		if (val[i] == KGSL_CONTEXT_TO_MEM_IDENTIFIER) {
-			if (ctx_switch) {
-				KGSL_FT_ERR(rb->device,
-				"Context switch encountered before bad "
-				"IB found\n");
-				break;
-			}
-			ctx_switch = true;
-		}
-		i = (i + 1) % 2;
-		if (1 == i)
-			check = true;
-		temp_rb_rptr = adreno_ringbuffer_inc_wrapped(temp_rb_rptr,
-								size);
-	}
-	if  (!status)
-		*rb_rptr = temp_rb_rptr;
-	return status;
-}
-
-static void adreno_setup_ft_data(struct kgsl_device *device,
-					struct adreno_ft_data *ft_data)
-{
-	int ret = 0;
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
-	struct kgsl_context *context;
-	struct adreno_context *adreno_context;
-	unsigned int rb_rptr = rb->wptr * sizeof(unsigned int);
-
-	memset(ft_data, 0, sizeof(*ft_data));
-	ft_data->start_of_replay_cmds = 0xFFFFFFFF;
-	ft_data->replay_for_snapshot = 0xFFFFFFFF;
-
-	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BASE, &ft_data->ib1);
-
-	kgsl_sharedmem_readl(&device->memstore, &ft_data->context_id,
-			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-			current_context));
-
-	kgsl_sharedmem_readl(&device->memstore,
-			&ft_data->global_eop,
-			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-			eoptimestamp));
-
-	/* Ensure context id and global eop ts read complete */
-	rmb();
-
-	ft_data->rb_buffer = vmalloc(rb->buffer_desc.size);
-	if (!ft_data->rb_buffer) {
-		KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
-				rb->buffer_desc.size);
-		return;
-	}
-
-	ft_data->bad_rb_buffer = vmalloc(rb->buffer_desc.size);
-	if (!ft_data->bad_rb_buffer) {
-		KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
-				rb->buffer_desc.size);
-		return;
-	}
-
-	ft_data->good_rb_buffer = vmalloc(rb->buffer_desc.size);
-	if (!ft_data->good_rb_buffer) {
-		KGSL_MEM_ERR(device, "vmalloc(%d) failed\n",
-				rb->buffer_desc.size);
-		return;
-	}
-	ft_data->status = 0;
-
-	/* find the start of bad command sequence in rb */
-	context = kgsl_context_get(device, ft_data->context_id);
-
-	ft_data->ft_policy = adreno_dev->ft_policy;
-
-	if (!ft_data->ft_policy)
-		ft_data->ft_policy = KGSL_FT_DEFAULT_POLICY;
-
-	/* Look for the command stream that is right after the global eop */
-	ret = _find_cmd_seq_after_eop_ts(rb, &rb_rptr,
-					ft_data->global_eop + 1, false);
-	if (ret) {
-		ft_data->ft_policy |= KGSL_FT_TEMP_DISABLE;
-		goto done;
-	} else {
-		ft_data->start_of_replay_cmds = rb_rptr;
-		ft_data->ft_policy &= ~KGSL_FT_TEMP_DISABLE;
-	}
-
-	if (context) {
-		adreno_context = ADRENO_CONTEXT(context);
-		if (adreno_context->flags & CTXT_FLAGS_PREAMBLE) {
-			if (ft_data->ib1) {
-				ret = _find_hanging_ib_sequence(rb,
-						&rb_rptr, ft_data->ib1);
-				if (ret) {
-					KGSL_FT_ERR(device,
-					"Start not found for replay IB seq\n");
-					goto done;
-				}
-				ft_data->start_of_replay_cmds = rb_rptr;
-				ft_data->replay_for_snapshot = rb_rptr;
-			}
-		}
-	}
-
-done:
-	kgsl_context_put(context);
-}
-
-static int
-_adreno_check_long_ib(struct kgsl_device *device)
-{
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	unsigned int curr_global_ts = 0;
-
-	/* check if the global ts is still the same */
-	kgsl_sharedmem_readl(&device->memstore,
-			&curr_global_ts,
-			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-			eoptimestamp));
-	/* Ensure above read is finished before long ib check */
-	rmb();
-
-	/* Mark long ib as handled */
-	adreno_dev->long_ib = 0;
-
-	if (curr_global_ts == adreno_dev->long_ib_ts) {
-		KGSL_FT_ERR(device,
-			"IB ran too long, invalidate ctxt\n");
-		return 1;
-	} else {
-		/* Do nothing GPU has gone ahead */
-		KGSL_FT_INFO(device, "false long ib detection return\n");
-		return 0;
-	}
-}
-
 /**
- * adreno_soft_reset() -  Do a soft reset of the GPU hardware
- * @device: KGSL device to soft reset
+ * adreno_reset() - Helper function to reset the GPU
+ * @device: Pointer to the KGSL device structure for the GPU
  *
- * "soft reset" the GPU hardware - this is a fast path GPU reset
- * The GPU hardware is reset but we never pull power so we can skip
- * a lot of the standard adreno_stop/adreno_start sequence
+ * Try to reset the GPU to recover from a fault.  First, try to do a low latency
+ * soft reset.  If the soft reset fails for some reason, then bring out the big
+ * guns and toggle the footswitch.
  */
-int adreno_soft_reset(struct kgsl_device *device)
+int adreno_reset(struct kgsl_device *device)
 {
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	int ret;
+	int ret = -EINVAL;
+	struct kgsl_mmu *mmu = &device->mmu;
+	int i = 0;
 
-	if (!adreno_dev->gpudev->soft_reset) {
-		dev_WARN_ONCE(device->dev, 1, "Soft reset not supported");
-		return -EINVAL;
+	/* Try soft reset first, for non mmu fault case only */
+	if (!atomic_read(&mmu->fault)) {
+		ret = adreno_soft_reset(device);
+		if (ret)
+			KGSL_DEV_ERR_ONCE(device, "Device soft reset failed\n");
 	}
+	if (ret) {
+		/* If soft reset failed/skipped, then pull the power */
+		adreno_stop(device);
 
-	if (adreno_dev->drawctxt_active)
-		kgsl_context_put(&adreno_dev->drawctxt_active->base);
+		/* Keep trying to start the device until it works */
+		for (i = 0; i < NUM_TIMES_RESET_RETRY; i++) {
+			ret = adreno_start(device);
+			if (!ret)
+				break;
 
-	adreno_dev->drawctxt_active = NULL;
-
-	/* Stop the ringbuffer */
-	adreno_ringbuffer_stop(&adreno_dev->ringbuffer);
-
-	/* Delete the idle timer */
-	del_timer_sync(&device->idle_timer);
-
-	/* Make sure we are totally awake */
-	kgsl_pwrctrl_enable(device);
-
-	/* Reset the GPU */
-	adreno_dev->gpudev->soft_reset(adreno_dev);
-
-	/* Reinitialize the GPU */
-	adreno_dev->gpudev->start(adreno_dev);
-
-	/* Enable IRQ */
-	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
-	device->ftbl->irqctrl(device, 1);
-
-	/*
-	 * If we have offsets for the jump tables we can try to do a warm start,
-	 * otherwise do a full ringbuffer restart
-	 */
-	if (adreno_dev->pm4_jt_idx)
-		ret = adreno_ringbuffer_warm_start(&adreno_dev->ringbuffer);
-	else
-		ret = adreno_ringbuffer_start(&adreno_dev->ringbuffer);
-
+			msleep(20);
+		}
+	}
 	if (ret)
 		return ret;
 
-	device->reset_counter++;
-
-	return 0;
-}
-
-static int
-_adreno_ft_restart_device(struct kgsl_device *device,
-			   struct kgsl_context *context)
-{
-	/* If device soft reset fails try hard reset */
-	if (adreno_soft_reset(device))
-		KGSL_DEV_ERR_ONCE(device, "Device soft reset failed\n");
-	else
-		/* Soft reset is successful */
-		goto reset_done;
-
-	/* restart device */
-	if (adreno_stop(device)) {
-		KGSL_FT_ERR(device, "Device stop failed\n");
-		return 1;
-	}
-
-	if (adreno_init(device)) {
-		KGSL_FT_ERR(device, "Device init failed\n");
-		return 1;
-	}
-
-	if (adreno_start(device)) {
-		KGSL_FT_ERR(device, "Device start failed\n");
-		return 1;
-	}
-
-reset_done:
-	if (context)
-		kgsl_mmu_setstate(&device->mmu, context->pagetable,
-				KGSL_MEMSTORE_GLOBAL);
-
-	/* If iommu is used then we need to make sure that the iommu clocks
-	 * are on since there could be commands in pipeline that touch iommu */
-	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype()) {
-		if (kgsl_mmu_enable_clk(&device->mmu,
-				KGSL_IOMMU_CONTEXT_USER))
-			return 1;
-	}
-
-	return 0;
-}
-
-static inline void
-_adreno_debug_ft_info(struct kgsl_device *device,
-			struct adreno_ft_data *ft_data)
-{
+	if (0 != i)
+		KGSL_DRV_WARN(device, "Device hard reset tried %d tries\n", i);
 
 	/*
-	 * Dumping rb is a very useful tool to debug FT.
-	 * It will tell us if we are extracting the rb correctly
-	 * NOP'ing the right IB, skipping the EOF correctly etc.
+	 * If active_cnt is non-zero then the system was active before
+	 * going into a reset - put it back in that state
 	 */
-	if (device->ft_log >= 7)  {
 
-		/* Print fault tolerance data here */
-		KGSL_FT_INFO(device, "Temp RB buffer size 0x%X\n",
-			ft_data->rb_size);
-		adreno_dump_rb(device, ft_data->rb_buffer,
-			ft_data->rb_size<<2, 0, ft_data->rb_size);
+	if (atomic_read(&device->active_cnt))
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
 
-		KGSL_FT_INFO(device, "Bad RB buffer size 0x%X\n",
-			ft_data->bad_rb_size);
-		adreno_dump_rb(device, ft_data->bad_rb_buffer,
-			ft_data->bad_rb_size<<2, 0, ft_data->bad_rb_size);
+	/* Set the page table back to the default page table */
+	kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
+			KGSL_MEMSTORE_GLOBAL);
 
-		KGSL_FT_INFO(device, "Good RB buffer size 0x%X\n",
-			ft_data->good_rb_size);
-		adreno_dump_rb(device, ft_data->good_rb_buffer,
-			ft_data->good_rb_size<<2, 0, ft_data->good_rb_size);
-
-	}
-}
-
-static int
-_adreno_ft_resubmit_rb(struct kgsl_device *device,
-			struct adreno_ringbuffer *rb,
-			struct kgsl_context *context,
-			struct adreno_ft_data *ft_data,
-			unsigned int *buff, unsigned int size)
-{
-	unsigned int ret = 0;
-	unsigned int retry_num = 0;
-
-	_adreno_debug_ft_info(device, ft_data);
-
-	do {
-		ret = _adreno_ft_restart_device(device, context);
-		if (ret == 0)
-			break;
-		/*
-		 * If device restart fails sleep for 20ms before
-		 * attempting restart. This allows GPU HW to settle
-		 * and improve the chances of next restart to be
-		 * successful.
-		 */
-		msleep(20);
-		KGSL_FT_ERR(device, "Retry device restart %d\n", retry_num);
-		retry_num++;
-	} while (retry_num < 4);
-
-	if (ret) {
-		KGSL_FT_ERR(device, "Device restart failed\n");
-		BUG_ON(1);
-		goto done;
-	}
-
-	if (size) {
-
-		/* submit commands and wait for them to pass */
-		adreno_ringbuffer_restore(rb, buff, size);
-
-		ret = adreno_idle(device);
-	}
-
-done:
 	return ret;
 }
 
-
-static int
-_adreno_ft(struct kgsl_device *device,
-			struct adreno_ft_data *ft_data)
-{
-	int ret = 0, i;
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
-	struct kgsl_context *context;
-	struct adreno_context *adreno_context = NULL;
-	struct adreno_context *last_active_ctx = adreno_dev->drawctxt_active;
-	unsigned int long_ib = 0;
-	static int no_context_ft;
-	struct kgsl_mmu *mmu = &device->mmu;
-
-	context = kgsl_context_get(device, ft_data->context_id);
-
-	if (context == NULL) {
-		KGSL_FT_ERR(device, "Last context unknown id:%d\n",
-			ft_data->context_id);
-		if (no_context_ft) {
-			/*
-			 * If 2 consecutive no context ft occurred then
-			 * just reset GPU
-			 */
-			no_context_ft = 0;
-			goto play_good_cmds;
-		}
-	} else {
-		no_context_ft = 0;
-		adreno_context = ADRENO_CONTEXT(context);
-		adreno_context->flags |= CTXT_FLAGS_GPU_HANG;
-		/*
-		 * set the invalid ts flag to 0 for this context since we have
-		 * detected a hang for it
-		 */
-		context->wait_on_invalid_ts = false;
-
-		if (!(adreno_context->flags & CTXT_FLAGS_PER_CONTEXT_TS)) {
-			ft_data->status = 1;
-			KGSL_FT_ERR(device, "Fault tolerance not supported\n");
-			goto play_good_cmds;
-		}
-
-		/*
-		 *  This flag will be set by userspace for contexts
-		 *  that do not want to be fault tolerant (ex: OPENCL)
-		 */
-		if (adreno_context->flags & CTXT_FLAGS_NO_FAULT_TOLERANCE) {
-			ft_data->status = 1;
-			KGSL_FT_ERR(device,
-			"No FT set for this context play good cmds\n");
-			goto play_good_cmds;
-		}
-
-	}
-
-	/* Check if we detected a long running IB, if false return */
-	if ((adreno_context) && (adreno_dev->long_ib)) {
-		long_ib = _adreno_check_long_ib(device);
-		if (!long_ib) {
-			adreno_context->flags &= ~CTXT_FLAGS_GPU_HANG;
-			return 0;
-		}
-	}
-
-	/*
-	 * Extract valid contents from rb which can still be executed after
-	 * hang
-	 */
-	adreno_ringbuffer_extract(rb, ft_data);
-
-	/* If long IB detected do not attempt replay of bad cmds */
-	if (long_ib) {
-		ft_data->status = 1;
-		_adreno_debug_ft_info(device, ft_data);
-		goto play_good_cmds;
-	}
-
-	if ((ft_data->ft_policy & KGSL_FT_DISABLE) ||
-		(ft_data->ft_policy & KGSL_FT_TEMP_DISABLE)) {
-		KGSL_FT_ERR(device, "NO FT policy play only good cmds\n");
-		ft_data->status = 1;
-		goto play_good_cmds;
-	}
-
-	/* Do not try to replay if hang is due to a pagefault */
-	if (context && test_bit(KGSL_CONTEXT_PAGEFAULT, &context->priv)) {
-		/* Resume MMU */
-		mmu->mmu_ops->mmu_pagefault_resume(mmu);
-		if ((ft_data->context_id == context->id) &&
-			(ft_data->global_eop == context->pagefault_ts)) {
-			ft_data->ft_policy &= ~KGSL_FT_REPLAY;
-			KGSL_FT_ERR(device, "MMU fault skipping replay\n");
-		}
-		clear_bit(KGSL_CONTEXT_PAGEFAULT, &context->priv);
-	}
-
-	if (ft_data->ft_policy & KGSL_FT_REPLAY) {
-		ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
-				ft_data->bad_rb_buffer, ft_data->bad_rb_size);
-
-		if (ret) {
-			KGSL_FT_ERR(device, "Replay status: 1\n");
-			ft_data->status = 1;
-		} else
-			goto play_good_cmds;
-	}
-
-	if (ft_data->ft_policy & KGSL_FT_SKIPIB) {
-		for (i = 0; i < ft_data->bad_rb_size; i++) {
-			if ((ft_data->bad_rb_buffer[i] ==
-					CP_HDR_INDIRECT_BUFFER_PFD) &&
-				(ft_data->bad_rb_buffer[i+1] == ft_data->ib1)) {
-
-				ft_data->bad_rb_buffer[i] = cp_nop_packet(2);
-				ft_data->bad_rb_buffer[i+1] =
-							KGSL_NOP_IB_IDENTIFIER;
-				ft_data->bad_rb_buffer[i+2] =
-							KGSL_NOP_IB_IDENTIFIER;
-				break;
-			}
-		}
-
-		if ((i == (ft_data->bad_rb_size)) || (!ft_data->ib1)) {
-			KGSL_FT_ERR(device, "Bad IB to NOP not found\n");
-			ft_data->status = 1;
-			goto play_good_cmds;
-		}
-
-		ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
-				ft_data->bad_rb_buffer, ft_data->bad_rb_size);
-
-		if (ret) {
-			KGSL_FT_ERR(device, "NOP faulty IB status: 1\n");
-			ft_data->status = 1;
-		} else {
-			ft_data->status = 0;
-			goto play_good_cmds;
-		}
-	}
-
-	if (ft_data->ft_policy & KGSL_FT_SKIPFRAME) {
-		for (i = 0; i < ft_data->bad_rb_size; i++) {
-			if (ft_data->bad_rb_buffer[i] ==
-					KGSL_END_OF_FRAME_IDENTIFIER) {
-				ft_data->bad_rb_buffer[0] = cp_nop_packet(i);
-				break;
-			}
-		}
-
-		/* EOF not found in RB, discard till EOF in
-		   next IB submission */
-		if (adreno_context && (i == ft_data->bad_rb_size)) {
-			adreno_context->flags |= CTXT_FLAGS_SKIP_EOF;
-			KGSL_FT_INFO(device,
-			"EOF not found in RB, skip next issueib till EOF\n");
-			ft_data->bad_rb_buffer[0] = cp_nop_packet(i);
-		}
-
-		ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
-				ft_data->bad_rb_buffer, ft_data->bad_rb_size);
-
-		if (ret) {
-			KGSL_FT_ERR(device, "Skip EOF status: 1\n");
-			ft_data->status = 1;
-		} else {
-			ft_data->status = 0;
-			goto play_good_cmds;
-		}
-	}
-
-play_good_cmds:
-
-	if (ft_data->status)
-		KGSL_FT_ERR(device, "Bad context commands failed\n");
-	else {
-		KGSL_FT_INFO(device, "Bad context commands success\n");
-
-		if (adreno_context) {
-			adreno_context->flags = (adreno_context->flags &
-				~CTXT_FLAGS_GPU_HANG) | CTXT_FLAGS_GPU_HANG_FT;
-		}
-
-		if (last_active_ctx)
-			_kgsl_context_get(&last_active_ctx->base);
-
-		adreno_dev->drawctxt_active = last_active_ctx;
-	}
-
-	ret = _adreno_ft_resubmit_rb(device, rb, context, ft_data,
-			ft_data->good_rb_buffer, ft_data->good_rb_size);
-
-	if (ret) {
-		/*
-		 * If we fail here we can try to invalidate another
-		 * context and try fault tolerance again, although
-		 * we will only try ft with no context once to avoid
-		 * going into continuous loop of trying ft with no context
-		 */
-		if (!context)
-			no_context_ft = 1;
-		ret = -EAGAIN;
-		KGSL_FT_ERR(device, "Playing good commands unsuccessful\n");
-		goto done;
-	} else
-		KGSL_FT_INFO(device, "Playing good commands successful\n");
-
-	/* ringbuffer now has data from the last valid context id,
-	 * so restore the active_ctx to the last valid context */
-	if (ft_data->last_valid_ctx_id) {
-		struct kgsl_context *last_ctx = kgsl_context_get(device,
-			ft_data->last_valid_ctx_id);
-
-		adreno_dev->drawctxt_active = ADRENO_CONTEXT(last_ctx);
-	}
-
-done:
-	/* Turn off iommu clocks */
-	if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype())
-		kgsl_mmu_disable_clk_on_ts(&device->mmu, 0, false);
-
-	kgsl_context_put(context);
-	return ret;
-}
-
-static int
-adreno_ft(struct kgsl_device *device,
-			struct adreno_ft_data *ft_data)
-{
-	int ret = 0;
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
-
-	/*
-	 * If GPU FT is turned off do not run FT.
-	 * If GPU stall detection is suspected to be false,
-	 * we can use this option to confirm stall detection.
-	 */
-	if (ft_data->ft_policy & KGSL_FT_OFF) {
-		KGSL_FT_ERR(device, "GPU FT turned off\n");
-		return 0;
-	}
-
-	KGSL_FT_INFO(device,
-	"Start Parameters: IB1: 0x%X, "
-	"Bad context_id: %u, global_eop: 0x%x\n",
-	ft_data->ib1, ft_data->context_id, ft_data->global_eop);
-
-	KGSL_FT_INFO(device, "Last issued global timestamp: %x\n",
-			rb->global_ts);
-
-	/* We may need to replay commands multiple times based on whether
-	 * multiple contexts hang the GPU */
-	while (true) {
-
-		ret = _adreno_ft(device, ft_data);
-
-		if (-EAGAIN == ret) {
-			/* setup new fault tolerance parameters and retry, this
-			 * means more than 1 contexts are causing hang */
-			adreno_destroy_ft_data(ft_data);
-			adreno_setup_ft_data(device, ft_data);
-			KGSL_FT_INFO(device,
-			"Retry. Parameters: "
-			"IB1: 0x%X, Bad context_id: %u, global_eop: 0x%x\n",
-			ft_data->ib1, ft_data->context_id,
-			ft_data->global_eop);
-		} else {
-			break;
-		}
-	}
-
-	if (ret)
-		goto done;
-
-	/* Restore correct states after fault tolerance */
-	if (adreno_dev->drawctxt_active)
-		device->mmu.hwpagetable =
-			adreno_dev->drawctxt_active->base.pagetable;
-	else
-		device->mmu.hwpagetable = device->mmu.defaultpagetable;
-	kgsl_sharedmem_writel(device, &device->memstore,
-			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-			eoptimestamp), rb->global_ts);
-
-	/* switch to NULL ctxt */
-	if (adreno_dev->drawctxt_active != NULL)
-		adreno_drawctxt_switch(adreno_dev, NULL, 0);
-
-done:
-	adreno_set_max_ts_for_bad_ctxs(device);
-	adreno_mark_context_status(device, ret);
-	KGSL_FT_ERR(device, "policy 0x%X status 0x%x\n",
-			ft_data->ft_policy, ret);
-	return ret;
-}
-
-int
-adreno_dump_and_exec_ft(struct kgsl_device *device)
-{
-	int result = -ETIMEDOUT;
-	struct adreno_ft_data ft_data;
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
-	unsigned int curr_pwrlevel;
-
-	if (device->state == KGSL_STATE_HUNG)
-		goto done;
-	if (device->state == KGSL_STATE_DUMP_AND_FT) {
-		mutex_unlock(&device->mutex);
-		wait_for_completion(&device->ft_gate);
-		mutex_lock(&device->mutex);
-		if (device->state != KGSL_STATE_HUNG)
-			result = 0;
-	} else {
-		/*
-		 * While fault tolerance is happening we do not want the
-		 * idle_timer to fire and attempt to change any device state
-		 */
-		del_timer_sync(&device->idle_timer);
-
-		kgsl_pwrctrl_set_state(device, KGSL_STATE_DUMP_AND_FT);
-		INIT_COMPLETION(device->ft_gate);
-		/* Detected a hang */
-
-		kgsl_cffdump_hang(device);
-		/* Run fault tolerance at max power level */
-		curr_pwrlevel = pwr->active_pwrlevel;
-		kgsl_pwrctrl_pwrlevel_change(device, pwr->max_pwrlevel);
-
-		/* Get the fault tolerance data as soon as hang is detected */
-		adreno_setup_ft_data(device, &ft_data);
-
-		/*
-		 * If long ib is detected, do not attempt postmortem or
-		 * snapshot, if GPU is still executing commands
-		 * we will get errors
-		 */
-		if (!adreno_dev->long_ib) {
-			/*
-			 * Trigger an automatic dump of the state to
-			 * the console
-			 */
-			kgsl_postmortem_dump(device, 0);
-
-			/*
-			* Make a GPU snapshot.  For now, do it after the
-			* PM dump so we can at least be sure the PM dump
-			* will work as it always has
-			*/
-			kgsl_device_snapshot(device, 1);
-		}
-
-		result = adreno_ft(device, &ft_data);
-		adreno_destroy_ft_data(&ft_data);
-
-		/* restore power level */
-		kgsl_pwrctrl_pwrlevel_change(device, curr_pwrlevel);
-
-		if (result) {
-			kgsl_pwrctrl_set_state(device, KGSL_STATE_HUNG);
-		} else {
-			kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
-			mod_timer(&device->hang_timer,
-				(jiffies +
-				msecs_to_jiffies(KGSL_TIMEOUT_PART)));
-		}
-		complete_all(&device->ft_gate);
-	}
-done:
-	return result;
-}
-EXPORT_SYMBOL(adreno_dump_and_exec_ft);
-
 /**
  * _ft_sysfs_store() -  Common routine to write to FT sysfs files
  * @buf: value to write
@@ -3139,140 +2334,184 @@
 	return status;
 }
 
-static int adreno_ringbuffer_drain(struct kgsl_device *device,
-	unsigned int *regs)
+/**
+ * adreno_hw_isidle() - Check if the GPU core is idle
+ * @device: Pointer to the KGSL device structure for the GPU
+ *
+ * Return true if the RBBM status register for the GPU type indicates that the
+ * hardware is idle
+ */
+static bool adreno_hw_isidle(struct kgsl_device *device)
+{
+	unsigned int reg_rbbm_status;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* Don't consider ourselves idle if there is an IRQ pending */
+	if (adreno_dev->gpudev->irq_pending(adreno_dev))
+		return false;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS,
+		&reg_rbbm_status);
+
+	if (adreno_is_a2xx(adreno_dev)) {
+		if (reg_rbbm_status == 0x110)
+			return true;
+	} else if (adreno_is_a3xx(adreno_dev)) {
+		if (!(reg_rbbm_status & 0x80000000))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * adreno_soft_reset() -  Do a soft reset of the GPU hardware
+ * @device: KGSL device to soft reset
+ *
+ * "soft reset" the GPU hardware - this is a fast path GPU reset
+ * The GPU hardware is reset but we never pull power so we can skip
+ * a lot of the standard adreno_stop/adreno_start sequence
+ */
+int adreno_soft_reset(struct kgsl_device *device)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
-	unsigned long wait = jiffies;
-	unsigned long timeout = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT);
-	unsigned int rptr;
+	int ret;
 
-	do {
-		/*
-		 * Wait is "jiffies" first time in the loop to start
-		 * GPU stall detection immediately.
-		 */
-		if (time_after(jiffies, wait)) {
-			/* Check to see if the core is hung */
-			if (adreno_ft_detect(device, regs))
-				return -ETIMEDOUT;
+	if (!adreno_dev->gpudev->soft_reset) {
+		dev_WARN_ONCE(device->dev, 1, "Soft reset not supported");
+		return -EINVAL;
+	}
 
-			wait = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART);
-		}
-		rptr = adreno_get_rptr(rb);
-		if (time_after(jiffies, timeout)) {
-			KGSL_DRV_ERR(device, "rptr: %x, wptr: %x\n",
-				rptr, rb->wptr);
-			return -ETIMEDOUT;
-		}
-	} while (rptr != rb->wptr);
+	if (adreno_dev->drawctxt_active)
+		kgsl_context_put(&adreno_dev->drawctxt_active->base);
+
+	adreno_dev->drawctxt_active = NULL;
+
+	/* Stop the ringbuffer */
+	adreno_ringbuffer_stop(&adreno_dev->ringbuffer);
+
+	if (kgsl_pwrctrl_isenabled(device))
+		device->ftbl->irqctrl(device, 0);
+
+	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+
+	adreno_clear_gpu_fault(adreno_dev);
+
+	/* Delete the idle timer */
+	del_timer_sync(&device->idle_timer);
+
+	/* Make sure we are totally awake */
+	kgsl_pwrctrl_enable(device);
+
+	/* Reset the GPU */
+	adreno_dev->gpudev->soft_reset(adreno_dev);
+
+	/* Reinitialize the GPU */
+	adreno_dev->gpudev->start(adreno_dev);
+
+	/* Enable IRQ */
+	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
+	device->ftbl->irqctrl(device, 1);
+
+	/*
+	 * If we have offsets for the jump tables we can try to do a warm start,
+	 * otherwise do a full ringbuffer restart
+	 */
+
+	if (adreno_dev->pm4_jt_idx)
+		ret = adreno_ringbuffer_warm_start(&adreno_dev->ringbuffer);
+	else
+		ret = adreno_ringbuffer_cold_start(&adreno_dev->ringbuffer);
+
+	if (ret)
+		return ret;
+
+	device->reset_counter++;
 
 	return 0;
 }
 
-/* Caller must hold the device mutex. */
+/*
+ * adreno_isidle() - return true if the GPU hardware is idle
+ * @device: Pointer to the KGSL device structure for the GPU
+ *
+ * Return true if the GPU hardware is idle and there are no commands pending in
+ * the ringbuffer
+ */
+bool adreno_isidle(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int rptr;
+
+	if (!kgsl_pwrctrl_isenabled(device))
+		return true;
+
+	rptr = adreno_get_rptr(&adreno_dev->ringbuffer);
+
+	if (rptr == adreno_dev->ringbuffer.wptr)
+		return adreno_hw_isidle(device);
+
+	return false;
+}
+
+/**
+ * adreno_idle() - wait for the GPU hardware to go idle
+ * @device: Pointer to the KGSL device structure for the GPU
+ *
+ * Wait up to ADRENO_IDLE_TIMEOUT milliseconds for the GPU hardware to go quiet.
+ */
+
 int adreno_idle(struct kgsl_device *device)
 {
-	unsigned long wait_time;
-	unsigned long wait_time_part;
-	unsigned int prev_reg_val[FT_DETECT_REGS_COUNT];
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned long wait = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT);
 
-	memset(prev_reg_val, 0, sizeof(prev_reg_val));
+	/*
+	 * Make sure the device mutex is held so the dispatcher can't send any
+	 * more commands to the hardware
+	 */
 
-	kgsl_cffdump_regpoll(device,
-		adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS) << 2,
-		0x00000000, 0x80000000);
+	BUG_ON(!mutex_is_locked(&device->mutex));
 
-retry:
-	/* First, wait for the ringbuffer to drain */
-	if (adreno_ringbuffer_drain(device, prev_reg_val))
-		goto err;
+	if (adreno_is_a3xx(adreno_dev))
+		kgsl_cffdump_regpoll(device,
+			adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS) << 2,
+			0x00000000, 0x80000000);
+	else
+		kgsl_cffdump_regpoll(device,
+			adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS) << 2,
+			0x110, 0x110);
 
-	/* now, wait for the GPU to finish its operations */
-	wait_time = jiffies + msecs_to_jiffies(ADRENO_IDLE_TIMEOUT);
-	wait_time_part = jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART);
+	while (time_before(jiffies, wait)) {
+		/*
+		 * If we fault, stop waiting and return an error. The dispatcher
+		 * will clean up the fault from the work queue, but we need to
+		 * make sure we don't block it by waiting for an idle that
+		 * will never come.
+		 */
 
-	while (time_before(jiffies, wait_time)) {
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			return -EDEADLK;
+
 		if (adreno_isidle(device))
 			return 0;
-
-		/* Dont wait for timeout, detect hang faster.  */
-		if (time_after(jiffies, wait_time_part)) {
-			wait_time_part = jiffies +
-				msecs_to_jiffies(KGSL_TIMEOUT_PART);
-			if ((adreno_ft_detect(device, prev_reg_val)))
-				goto err;
-		}
-
 	}
 
-err:
-	KGSL_DRV_ERR(device, "spun too long waiting for RB to idle\n");
-	if (KGSL_STATE_DUMP_AND_FT != device->state &&
-		!adreno_dump_and_exec_ft(device)) {
-		wait_time = jiffies + ADRENO_IDLE_TIMEOUT;
-		goto retry;
-	}
 	return -ETIMEDOUT;
 }
 
 /**
- * is_adreno_rbbm_status_idle - Check if GPU core is idle by probing
- * rbbm_status register
- * @device - Pointer to the GPU device whose idle status is to be
- * checked
- * @returns - Returns whether the core is idle (based on rbbm_status)
- * false if the core is active, true if the core is idle
+ * adreno_drain() - Drain the dispatch queue
+ * @device: Pointer to the KGSL device structure for the GPU
+ *
+ * Drain the dispatcher of existing command batches.  This halts
+ * additional commands from being issued until the gate is completed.
  */
-static bool is_adreno_rbbm_status_idle(struct kgsl_device *device)
+static int adreno_drain(struct kgsl_device *device)
 {
-	unsigned int reg_rbbm_status;
-	bool status = false;
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	INIT_COMPLETION(device->cmdbatch_gate);
 
-	/* Is the core idle? */
-	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS,
-				&reg_rbbm_status);
-
-	if (adreno_is_a2xx(adreno_dev)) {
-		if (reg_rbbm_status == 0x110)
-			status = true;
-	} else {
-		if (!(reg_rbbm_status & 0x80000000))
-			status = true;
-	}
-	return status;
-}
-
-static unsigned int adreno_isidle(struct kgsl_device *device)
-{
-	int status = false;
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
-
-	/* If the device isn't active, don't force it on. */
-	if (kgsl_pwrctrl_isenabled(device)) {
-		/* Is the ring buffer is empty? */
-		unsigned int rptr = adreno_get_rptr(rb);
-		if (rptr == rb->wptr) {
-			/*
-			 * Are there interrupts pending? If so then pretend we
-			 * are not idle - this avoids the possiblity that we go
-			 * to a lower power state without handling interrupts
-			 * first.
-			 */
-
-			if (!adreno_dev->gpudev->irq_pending(adreno_dev)) {
-				/* Is the core idle? */
-				status = is_adreno_rbbm_status_idle(device);
-			}
-		}
-	} else {
-		status = true;
-	}
-	return status;
+	return 0;
 }
 
 /* Caller must hold the device mutex. */
@@ -3308,7 +2547,8 @@
 		if (context == NULL)
 			break;
 
-		if (kgsl_mmu_pt_equal(&device->mmu, context->pagetable,
+		if (kgsl_mmu_pt_equal(&device->mmu,
+					context->proc_priv->pagetable,
 					pt_base)) {
 			struct adreno_context *adreno_context;
 
@@ -3446,330 +2686,6 @@
 	__raw_writel(value, reg);
 }
 
-static unsigned int _get_context_id(struct kgsl_context *k_ctxt)
-{
-	unsigned int context_id = KGSL_MEMSTORE_GLOBAL;
-
-	if (k_ctxt != NULL) {
-		struct adreno_context *a_ctxt = ADRENO_CONTEXT(k_ctxt);
-		if (kgsl_context_detached(k_ctxt))
-			context_id = KGSL_CONTEXT_INVALID;
-		else if (a_ctxt->flags & CTXT_FLAGS_PER_CONTEXT_TS)
-			context_id = k_ctxt->id;
-	}
-
-	return context_id;
-}
-
-static unsigned int adreno_check_hw_ts(struct kgsl_device *device,
-		struct kgsl_context *context, unsigned int timestamp)
-{
-	int status = 0;
-	unsigned int ref_ts, enableflag;
-	unsigned int context_id = _get_context_id(context);
-
-	/*
-	 * If the context ID is invalid, we are in a race with
-	 * the context being destroyed by userspace so bail.
-	 */
-	if (context_id == KGSL_CONTEXT_INVALID) {
-		KGSL_DRV_WARN(device, "context was detached");
-		return -EINVAL;
-	}
-
-	status = kgsl_check_timestamp(device, context, timestamp);
-	if (status)
-		return status;
-
-	kgsl_sharedmem_readl(&device->memstore, &enableflag,
-			KGSL_MEMSTORE_OFFSET(context_id, ts_cmp_enable));
-	/*
-	 * Barrier is needed here to make sure the read from memstore
-	 * has posted
-	 */
-
-	mb();
-
-	if (enableflag) {
-		kgsl_sharedmem_readl(&device->memstore, &ref_ts,
-				KGSL_MEMSTORE_OFFSET(context_id,
-					ref_wait_ts));
-
-		/* Make sure the memstore read has posted */
-		mb();
-		if (timestamp_cmp(ref_ts, timestamp) >= 0) {
-			kgsl_sharedmem_writel(device, &device->memstore,
-					KGSL_MEMSTORE_OFFSET(context_id,
-						ref_wait_ts), timestamp);
-			/* Make sure the memstore write is posted */
-			wmb();
-		}
-	} else {
-		kgsl_sharedmem_writel(device, &device->memstore,
-				KGSL_MEMSTORE_OFFSET(context_id,
-					ref_wait_ts), timestamp);
-		enableflag = 1;
-		kgsl_sharedmem_writel(device, &device->memstore,
-				KGSL_MEMSTORE_OFFSET(context_id,
-					ts_cmp_enable), enableflag);
-
-		/* Make sure the memstore write gets posted */
-		wmb();
-
-		/*
-		 * submit a dummy packet so that even if all
-		 * commands upto timestamp get executed we will still
-		 * get an interrupt
-		 */
-
-		if (context && device->state != KGSL_STATE_SLUMBER) {
-			adreno_ringbuffer_issuecmds(device,
-					ADRENO_CONTEXT(context),
-					KGSL_CMD_FLAGS_GET_INT, NULL, 0);
-		}
-	}
-
-	return 0;
-}
-
-/* Return 1 if the event timestmp has already passed, 0 if it was marked */
-static int adreno_next_event(struct kgsl_device *device,
-		struct kgsl_event *event)
-{
-	return adreno_check_hw_ts(device, event->context, event->timestamp);
-}
-
-static int adreno_check_interrupt_timestamp(struct kgsl_device *device,
-		struct kgsl_context *context, unsigned int timestamp)
-{
-	int status;
-
-	mutex_lock(&device->mutex);
-	status = adreno_check_hw_ts(device, context, timestamp);
-	mutex_unlock(&device->mutex);
-
-	return status;
-}
-
-/*
- wait_event_interruptible_timeout checks for the exit condition before
- placing a process in wait q. For conditional interrupts we expect the
- process to already be in its wait q when its exit condition checking
- function is called.
-*/
-#define kgsl_wait_event_interruptible_timeout(wq, condition, timeout, io)\
-({									\
-	long __ret = timeout;						\
-	if (io)						\
-		__wait_io_event_interruptible_timeout(wq, condition, __ret);\
-	else						\
-		__wait_event_interruptible_timeout(wq, condition, __ret);\
-	__ret;								\
-})
-
-
-
-unsigned int adreno_ft_detect(struct kgsl_device *device,
-						unsigned int *prev_reg_val)
-{
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
-	unsigned int curr_reg_val[FT_DETECT_REGS_COUNT];
-	unsigned int fast_hang_detected = 1;
-	unsigned int i;
-	static unsigned long next_hang_detect_time;
-	static unsigned int prev_global_ts;
-	unsigned int curr_global_ts = 0;
-	unsigned int curr_context_id = 0;
-	static struct adreno_context *curr_context;
-	static struct kgsl_context *context;
-	static char pid_name[TASK_COMM_LEN] = "unknown";
-
-	if (!adreno_dev->fast_hang_detect)
-		fast_hang_detected = 0;
-
-	if (!(adreno_dev->ringbuffer.flags & KGSL_FLAGS_STARTED))
-		return 0;
-
-	if (is_adreno_rbbm_status_idle(device) &&
-		(kgsl_readtimestamp(device, NULL, KGSL_TIMESTAMP_RETIRED)
-		 == rb->global_ts)) {
-
-		/*
-		 * On A2XX if the RPTR != WPTR and the device is idle, then
-		 * the last write to WPTR probably failed to latch so write it
-		 * again
-		 */
-
-		if (adreno_is_a2xx(adreno_dev)) {
-			unsigned int rptr;
-			adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
-						&rptr);
-			if (rptr != adreno_dev->ringbuffer.wptr)
-				adreno_writereg(adreno_dev,
-					ADRENO_REG_CP_RB_WPTR,
-					adreno_dev->ringbuffer.wptr);
-		}
-
-		return 0;
-	}
-
-	/*
-	 * Time interval between hang detection should be KGSL_TIMEOUT_PART
-	 * or more, if next hang detection is requested < KGSL_TIMEOUT_PART
-	 * from the last time do nothing.
-	 */
-	if ((next_hang_detect_time) &&
-		(time_before(jiffies, next_hang_detect_time)))
-			return 0;
-	else
-		next_hang_detect_time = (jiffies +
-			msecs_to_jiffies(KGSL_TIMEOUT_PART-1));
-
-	/* Read the current Hang detect reg values here */
-	for (i = 0; i < FT_DETECT_REGS_COUNT; i++) {
-		if (ft_detect_regs[i] == 0)
-			continue;
-		kgsl_regread(device, ft_detect_regs[i],
-			&curr_reg_val[i]);
-	}
-
-	/* Read the current global timestamp here */
-	kgsl_sharedmem_readl(&device->memstore,
-			&curr_global_ts,
-			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-			eoptimestamp));
-	/* Make sure the memstore read has posted */
-	mb();
-
-	if (curr_global_ts == prev_global_ts) {
-
-		/* If we don't already have a good context, get it. */
-		if (kgsl_context_detached(context)) {
-			kgsl_context_put(context);
-			context = NULL;
-			curr_context = NULL;
-			strlcpy(pid_name, "unknown", sizeof(pid_name));
-
-			kgsl_sharedmem_readl(&device->memstore,
-				&curr_context_id,
-				KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-				current_context));
-			/* Make sure the memstore read has posted */
-			mb();
-
-			context = kgsl_context_get(device, curr_context_id);
-			if (context != NULL) {
-				struct task_struct *task;
-				curr_context = ADRENO_CONTEXT(context);
-				curr_context->ib_gpu_time_used = 0;
-				task = find_task_by_vpid(context->pid);
-				if (task)
-					get_task_comm(pid_name, task);
-			} else {
-				KGSL_DRV_ERR(device,
-					"Fault tolerance no context found\n");
-			}
-		}
-		for (i = 0; i < FT_DETECT_REGS_COUNT; i++) {
-			if (ft_detect_regs[i] == 0)
-				continue;
-			if (curr_reg_val[i] != prev_reg_val[i])
-				fast_hang_detected = 0;
-		}
-
-		if (fast_hang_detected) {
-			KGSL_FT_ERR(device,
-				"Proc %s, ctxt_id %d ts %d triggered fault tolerance"
-				" on global ts %d\n",
-				pid_name, context ? context->id : 0,
-				(kgsl_readtimestamp(device, context,
-				KGSL_TIMESTAMP_RETIRED) + 1),
-				curr_global_ts + 1);
-			return 1;
-		}
-
-		if (curr_context != NULL) {
-
-			curr_context->ib_gpu_time_used += KGSL_TIMEOUT_PART;
-			KGSL_FT_INFO(device,
-			"Proc %s used GPU Time %d ms on timestamp 0x%X\n",
-			pid_name, curr_context->ib_gpu_time_used,
-			curr_global_ts+1);
-
-			if ((adreno_dev->long_ib_detect) &&
-				(!(curr_context->flags &
-				 CTXT_FLAGS_NO_FAULT_TOLERANCE)) &&
-				(curr_context->ib_gpu_time_used >
-					KGSL_TIMEOUT_LONG_IB_DETECTION) &&
-				(adreno_dev->long_ib_ts != curr_global_ts)) {
-						KGSL_FT_ERR(device,
-						"Proc %s, ctxt_id %d ts %d"
-						"used GPU for %d ms long ib "
-						"detected on global ts %d\n",
-						pid_name, context->id,
-						(kgsl_readtimestamp(device,
-						context,
-						KGSL_TIMESTAMP_RETIRED)+1),
-						curr_context->ib_gpu_time_used,
-						curr_global_ts+1);
-						adreno_dev->long_ib = 1;
-						adreno_dev->long_ib_ts =
-								curr_global_ts;
-						curr_context->ib_gpu_time_used =
-								0;
-						return 1;
-			}
-		}
-	} else {
-		/* GPU is moving forward */
-		prev_global_ts = curr_global_ts;
-		kgsl_context_put(context);
-		context = NULL;
-		curr_context = NULL;
-		strlcpy(pid_name, "unknown", sizeof(pid_name));
-		adreno_dev->long_ib = 0;
-		adreno_dev->long_ib_ts = 0;
-	}
-
-
-	/* If hangs are not detected copy the current reg values
-	 * to previous values and return no hang */
-	for (i = 0; i < FT_DETECT_REGS_COUNT; i++) {
-		if (ft_detect_regs[i] == 0)
-			continue;
-		prev_reg_val[i] = curr_reg_val[i];
-	}
-
-	return 0;
-}
-
-static int _check_pending_timestamp(struct kgsl_device *device,
-		struct kgsl_context *context, unsigned int timestamp)
-{
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	unsigned int context_id = _get_context_id(context);
-	unsigned int ts_issued;
-
-	if (context_id == KGSL_CONTEXT_INVALID)
-		return -EINVAL;
-
-	ts_issued = adreno_context_timestamp(context, &adreno_dev->ringbuffer);
-
-	if (timestamp_cmp(timestamp, ts_issued) <= 0)
-		return 0;
-
-	if (context && !context->wait_on_invalid_ts) {
-		KGSL_DRV_ERR(device, "Cannot wait for invalid ts <%d:0x%x>, last issued ts <%d:0x%x>\n",
-			context_id, timestamp, context_id, ts_issued);
-
-			/* Only print this message once */
-			context->wait_on_invalid_ts = true;
-	}
-
-	return -EINVAL;
-}
-
 /**
  * adreno_waittimestamp - sleep while waiting for the specified timestamp
  * @device - pointer to a KGSL device structure
@@ -3777,147 +2693,44 @@
  * @timestamp - GPU timestamp to wait for
  * @msecs - amount of time to wait (in milliseconds)
  *
- * Wait 'msecs' milliseconds for the specified timestamp to expire. Wake up
- * every KGSL_TIMEOUT_PART milliseconds to check for a device hang and process
- * one if it happened.  Otherwise, spend most of our time in an interruptible
- * wait for the timestamp interrupt to be processed.  This function must be
- * called with the mutex already held.
+ * Wait up to 'msecs' milliseconds for the specified timestamp to expire.
  */
 static int adreno_waittimestamp(struct kgsl_device *device,
-				struct kgsl_context *context,
-				unsigned int timestamp,
-				unsigned int msecs)
+		struct kgsl_context *context,
+		unsigned int timestamp,
+		unsigned int msecs)
 {
-	static unsigned int io_cnt;
-	struct adreno_context *adreno_ctx = context ? ADRENO_CONTEXT(context) :
-		NULL;
-	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
-	unsigned int context_id = _get_context_id(context);
-	unsigned int time_elapsed = 0;
-	unsigned int wait;
-	int ts_compare = 1;
-	int io, ret = -ETIMEDOUT;
+	int ret;
+	struct adreno_context *drawctxt;
 
-	if (context_id == KGSL_CONTEXT_INVALID) {
-		KGSL_DRV_WARN(device, "context was detached");
+	if (context == NULL) {
+		/* If they are doing then complain once */
+		dev_WARN_ONCE(device->dev, 1,
+			"IOCTL_KGSL_DEVICE_WAITTIMESTAMP is deprecated\n");
+		return -ENOTTY;
+	}
+
+	/* Return -EINVAL if the context has been detached */
+	if (kgsl_context_detached(context))
 		return -EINVAL;
-	}
+
+	ret = adreno_drawctxt_wait(ADRENO_DEVICE(device), context,
+		timestamp, msecs_to_jiffies(msecs));
+
+	/* If the context got invalidated then return a specific error */
+	drawctxt = ADRENO_CONTEXT(context);
+
+	if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID)
+		ret = -EDEADLK;
 
 	/*
-	 * Check to see if the requested timestamp is "newer" then the last
-	 * timestamp issued. If it is complain once and return error.  Only
-	 * print the message once per context so that badly behaving
-	 * applications don't spam the logs
+	 * Return -EPROTO if the device has faulted since the last time we
+	 * checked.  Userspace uses this as a marker for performing post
+	 * fault activities
 	 */
 
-	if (adreno_ctx && !(adreno_ctx->flags & CTXT_FLAGS_USER_GENERATED_TS)) {
-		if (_check_pending_timestamp(device, context, timestamp))
-			return -EINVAL;
-
-		/* Reset the invalid timestamp flag on a valid wait */
-		context->wait_on_invalid_ts = false;
-	}
-
-	/*
-	 * On the first time through the loop only wait 100ms.
-	 * this gives enough time for the engine to start moving and oddly
-	 * provides better hang detection results than just going the full
-	 * KGSL_TIMEOUT_PART right off the bat. The exception to this rule
-	 * is if msecs happens to be < 100ms then just use 20ms or the msecs,
-	 * whichever is larger because anything less than 20 is unreliable
-	 */
-
-	if (msecs == 0 || msecs >= 100)
-		wait = 100;
-	else
-		wait = (msecs > 20) ? msecs : 20;
-
-	do {
-		long status;
-
-		/*
-		 * if the timestamp happens while we're not
-		 * waiting, there's a chance that an interrupt
-		 * will not be generated and thus the timestamp
-		 * work needs to be queued.
-		 */
-
-		if (kgsl_check_timestamp(device, context, timestamp)) {
-			queue_work(device->work_queue, &device->ts_expired_ws);
-			ret = 0;
-			break;
-		}
-
-		/*
-		 * For proper power accounting sometimes we need to call
-		 * io_wait_interruptible_timeout and sometimes we need to call
-		 * plain old wait_interruptible_timeout. We call the regular
-		 * timeout N times out of 100, where N is a number specified by
-		 * the current power level
-		 */
-
-		io_cnt = (io_cnt + 1) % 100;
-		io = (io_cnt < pwr->pwrlevels[pwr->active_pwrlevel].io_fraction)
-			? 0 : 1;
-
-		mutex_unlock(&device->mutex);
-
-		/* Wait for a timestamp event */
-		status = kgsl_wait_event_interruptible_timeout(
-			device->wait_queue,
-			adreno_check_interrupt_timestamp(device, context,
-				timestamp), msecs_to_jiffies(wait), io);
-
-		mutex_lock(&device->mutex);
-
-		/*
-		 * If status is non zero then either the condition was satisfied
-		 * or there was an error.  In either event, this is the end of
-		 * the line for us
-		 */
-
-		if (status != 0) {
-			ret = (status > 0) ? 0 : (int) status;
-			break;
-		}
-		time_elapsed += wait;
-
-		/* If user specified timestamps are being used, wait at least
-		 * KGSL_SYNCOBJ_SERVER_TIMEOUT msecs for the user driver to
-		 * issue a IB for a timestamp before checking to see if the
-		 * current timestamp we are waiting for is valid or not
-		 */
-
-		if (ts_compare && (adreno_ctx &&
-			(adreno_ctx->flags & CTXT_FLAGS_USER_GENERATED_TS))) {
-			if (time_elapsed > KGSL_SYNCOBJ_SERVER_TIMEOUT) {
-				ret = _check_pending_timestamp(device, context,
-					timestamp);
-				if (ret)
-					break;
-
-				/* Don't do this check again */
-				ts_compare = 0;
-
-				/*
-				 * Reset the invalid timestamp flag on a valid
-				 * wait
-				 */
-				context->wait_on_invalid_ts = false;
-			}
-		}
-
-		/*
-		 * We want to wait the floor of KGSL_TIMEOUT_PART
-		 * and (msecs - time_elapsed).
-		 */
-
-		if (KGSL_TIMEOUT_PART < (msecs - time_elapsed))
-			wait = KGSL_TIMEOUT_PART;
-		else
-			wait = (msecs - time_elapsed);
-
-	} while (!msecs || time_elapsed < msecs);
+	if (!ret && test_and_clear_bit(ADRENO_CONTEXT_FAULT, &drawctxt->priv))
+		ret = -EPROTO;
 
 	return ret;
 }
@@ -3926,16 +2739,8 @@
 		struct kgsl_context *context, enum kgsl_timestamp_type type)
 {
 	unsigned int timestamp = 0;
-	unsigned int context_id = _get_context_id(context);
+	unsigned int id = context ? context->id : KGSL_MEMSTORE_GLOBAL;
 
-	/*
-	 * If the context ID is invalid, we are in a race with
-	 * the context being destroyed by userspace so bail.
-	 */
-	if (context_id == KGSL_CONTEXT_INVALID) {
-		KGSL_DRV_WARN(device, "context was detached");
-		return timestamp;
-	}
 	switch (type) {
 	case KGSL_TIMESTAMP_QUEUED: {
 		struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
@@ -3946,11 +2751,11 @@
 	}
 	case KGSL_TIMESTAMP_CONSUMED:
 		kgsl_sharedmem_readl(&device->memstore, &timestamp,
-			KGSL_MEMSTORE_OFFSET(context_id, soptimestamp));
+			KGSL_MEMSTORE_OFFSET(id, soptimestamp));
 		break;
 	case KGSL_TIMESTAMP_RETIRED:
 		kgsl_sharedmem_readl(&device->memstore, &timestamp,
-			KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp));
+			KGSL_MEMSTORE_OFFSET(id, eoptimestamp));
 		break;
 	}
 
@@ -3991,8 +2796,17 @@
 	}
 	case IOCTL_KGSL_PERFCOUNTER_GET: {
 		struct kgsl_perfcounter_get *get = data;
+		/*
+		 * adreno_perfcounter_get() is called by kernel clients
+		 * during start(), so it is not safe to take an
+		 * active count inside this function.
+		 */
+		result = kgsl_active_count_get(device);
+		if (result)
+			break;
 		result = adreno_perfcounter_get(adreno_dev, get->groupid,
 			get->countable, &get->offset, PERFCOUNTER_FLAG_NONE);
+		kgsl_active_count_put(device);
 		break;
 	}
 	case IOCTL_KGSL_PERFCOUNTER_PUT: {
@@ -4010,8 +2824,12 @@
 	}
 	case IOCTL_KGSL_PERFCOUNTER_READ: {
 		struct kgsl_perfcounter_read *read = data;
+		result = kgsl_active_count_get(device);
+		if (result)
+			break;
 		result = adreno_perfcounter_read_group(adreno_dev,
 			read->reads, read->count);
+		kgsl_active_count_put(device);
 		break;
 	}
 	default:
@@ -4110,6 +2928,7 @@
 	.gpuid = adreno_gpuid,
 	.snapshot = adreno_snapshot,
 	.irq_handler = adreno_irq_handler,
+	.drain = adreno_drain,
 	/* Optional functions */
 	.setstate = adreno_setstate,
 	.drawctxt_create = adreno_drawctxt_create,
@@ -4117,7 +2936,8 @@
 	.drawctxt_destroy = adreno_drawctxt_destroy,
 	.setproperty = adreno_setproperty,
 	.postmortem_dump = adreno_dump,
-	.next_event = adreno_next_event,
+	.drawctxt_sched = adreno_drawctxt_sched,
+	.resume = adreno_dispatcher_start,
 };
 
 static struct platform_driver adreno_platform_driver = {
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index a837574..418d230 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -35,13 +35,12 @@
 #define ADRENO_CHIPID_PATCH(_id) ((_id) & 0xFF)
 
 /* Flags to control command packet settings */
-#define KGSL_CMD_FLAGS_NONE             0x00000000
-#define KGSL_CMD_FLAGS_PMODE		0x00000001
-#define KGSL_CMD_FLAGS_INTERNAL_ISSUE	0x00000002
-#define KGSL_CMD_FLAGS_GET_INT		0x00000004
-#define KGSL_CMD_FLAGS_PROFILE		0x00000008
-#define KGSL_CMD_FLAGS_PWRON_FIXUP	0x00000010
-#define KGSL_CMD_FLAGS_EOF	        0x00000100
+#define KGSL_CMD_FLAGS_NONE             0
+#define KGSL_CMD_FLAGS_PMODE		BIT(0)
+#define KGSL_CMD_FLAGS_INTERNAL_ISSUE   BIT(1)
+#define KGSL_CMD_FLAGS_WFI              BIT(2)
+#define KGSL_CMD_FLAGS_PROFILE		BIT(3)
+#define KGSL_CMD_FLAGS_PWRON_FIXUP      BIT(4)
 
 /* Command identifiers */
 #define KGSL_CONTEXT_TO_MEM_IDENTIFIER	0x2EADBEEF
@@ -97,6 +96,56 @@
 	TRACE_BUS_CTL,
 };
 
+#define ADRENO_SOFT_FAULT BIT(0)
+#define ADRENO_HARD_FAULT BIT(1)
+#define ADRENO_TIMEOUT_FAULT BIT(2)
+#define ADRENO_IOMMU_PAGE_FAULT BIT(3)
+
+/*
+ * Maximum size of the dispatcher ringbuffer - the actual inflight size will be
+ * smaller then this but this size will allow for a larger range of inflight
+ * sizes that can be chosen at runtime
+ */
+
+#define ADRENO_DISPATCH_CMDQUEUE_SIZE 128
+
+/**
+ * struct adreno_dispatcher - container for the adreno GPU dispatcher
+ * @mutex: Mutex to protect the structure
+ * @state: Current state of the dispatcher (active or paused)
+ * @timer: Timer to monitor the progress of the command batches
+ * @inflight: Number of command batch operations pending in the ringbuffer
+ * @fault: Non-zero if a fault was detected.
+ * @pending: Priority list of contexts waiting to submit command batches
+ * @plist_lock: Spin lock to protect the pending queue
+ * @cmdqueue: Queue of command batches currently flight
+ * @head: pointer to the head of of the cmdqueue.  This is the oldest pending
+ * operation
+ * @tail: pointer to the tail of the cmdqueue.  This is the most recently
+ * submitted operation
+ * @work: work_struct to put the dispatcher in a work queue
+ * @kobj: kobject for the dispatcher directory in the device sysfs node
+ */
+struct adreno_dispatcher {
+	struct mutex mutex;
+	unsigned long priv;
+	struct timer_list timer;
+	struct timer_list fault_timer;
+	unsigned int inflight;
+	atomic_t fault;
+	struct plist_head pending;
+	spinlock_t plist_lock;
+	struct kgsl_cmdbatch *cmdqueue[ADRENO_DISPATCH_CMDQUEUE_SIZE];
+	unsigned int head;
+	unsigned int tail;
+	struct work_struct work;
+	struct kobject kobj;
+};
+
+enum adreno_dispatcher_flags {
+	ADRENO_DISPATCHER_POWER = 0,
+};
+
 struct adreno_gpudev;
 
 struct adreno_device {
@@ -121,8 +170,11 @@
 	unsigned int wait_timeout;
 	unsigned int pm4_jt_idx;
 	unsigned int pm4_jt_addr;
+	unsigned int pm4_bstrp_size;
 	unsigned int pfp_jt_idx;
 	unsigned int pfp_jt_addr;
+	unsigned int pfp_bstrp_size;
+	unsigned int pfp_bstrp_ver;
 	unsigned int istore_size;
 	unsigned int pix_shader_start;
 	unsigned int instruction_size;
@@ -140,6 +192,7 @@
 	struct adreno_profile profile;
 	struct kgsl_memdesc pwron_fixup;
 	unsigned int pwron_fixup_dwords;
+	struct adreno_dispatcher dispatcher;
 };
 
 /**
@@ -151,6 +204,7 @@
 enum adreno_device_flags {
 	ADRENO_DEVICE_PWRON = 0,
 	ADRENO_DEVICE_PWRON_FIXUP = 1,
+	ADRENO_DEVICE_INITIALIZED = 2,
 };
 
 #define PERFCOUNTER_FLAG_NONE 0x0
@@ -164,12 +218,16 @@
  * @kernelcount: number of user space users of the register
  * @usercount: number of kernel users of the register
  * @offset: register hardware offset
+ * @load_bit: The bit number in LOAD register which corresponds to this counter
+ * @select: The countable register offset
  */
 struct adreno_perfcount_register {
 	unsigned int countable;
 	unsigned int kernelcount;
 	unsigned int usercount;
 	unsigned int offset;
+	int load_bit;
+	unsigned int select;
 };
 
 /**
@@ -194,6 +252,9 @@
 	unsigned int group_count;
 };
 
+#define ADRENO_PERFCOUNTER_GROUP(core, name) { core##_perfcounters_##name, \
+	ARRAY_SIZE(core##_perfcounters_##name), __stringify(name) }
+
 /**
  * adreno_regs: List of registers that are used in kgsl driver for all
  * 3D devices. Each device type has different offset value for the same
@@ -250,6 +311,7 @@
 	ADRENO_REG_SQ_INST_STORE_MANAGMENT,
 	ADRENO_REG_TC_CNTL_STATUS,
 	ADRENO_REG_TP0_CHICKEN,
+	ADRENO_REG_RBBM_RBBM_CTL,
 	ADRENO_REG_REGISTER_MAX,
 };
 
@@ -281,23 +343,19 @@
 
 	/* GPU specific function hooks */
 	int (*ctxt_create)(struct adreno_device *, struct adreno_context *);
-	void (*ctxt_save)(struct adreno_device *, struct adreno_context *);
-	void (*ctxt_restore)(struct adreno_device *, struct adreno_context *);
-	void (*ctxt_draw_workaround)(struct adreno_device *,
-					struct adreno_context *);
 	irqreturn_t (*irq_handler)(struct adreno_device *);
 	void (*irq_control)(struct adreno_device *, int);
 	unsigned int (*irq_pending)(struct adreno_device *);
 	void * (*snapshot)(struct adreno_device *, void *, int *, int);
 	int (*rb_init)(struct adreno_device *, struct adreno_ringbuffer *);
-	void (*perfcounter_init)(struct adreno_device *);
+	int (*perfcounter_init)(struct adreno_device *);
+	void (*perfcounter_close)(struct adreno_device *);
 	void (*start)(struct adreno_device *);
 	unsigned int (*busy_cycles)(struct adreno_device *);
-	void (*perfcounter_enable)(struct adreno_device *, unsigned int group,
+	int (*perfcounter_enable)(struct adreno_device *, unsigned int group,
 		unsigned int counter, unsigned int countable);
 	uint64_t (*perfcounter_read)(struct adreno_device *adreno_dev,
-		unsigned int group, unsigned int counter,
-		unsigned int offset);
+		unsigned int group, unsigned int counter);
 	int (*coresight_enable) (struct kgsl_device *device);
 	void (*coresight_disable) (struct kgsl_device *device);
 	void (*coresight_config_debug_reg) (struct kgsl_device *device,
@@ -306,46 +364,6 @@
 	void (*postmortem_dump)(struct adreno_device *adreno_dev);
 };
 
-/*
- * struct adreno_ft_data - Structure that contains all information to
- * perform gpu fault tolerance
- * @ib1 - IB1 that the GPU was executing when hang happened
- * @context_id - Context which caused the hang
- * @global_eop - eoptimestamp at time of hang
- * @rb_buffer - Buffer that holds the commands from good contexts
- * @rb_size - Number of valid dwords in rb_buffer
- * @bad_rb_buffer - Buffer that holds commands from the hanging context
- * bad_rb_size - Number of valid dwords in bad_rb_buffer
- * @good_rb_buffer - Buffer that holds commands from good contexts
- * good_rb_size - Number of valid dwords in good_rb_buffer
- * @last_valid_ctx_id - The last context from which commands were placed in
- * ringbuffer before the GPU hung
- * @step - Current fault tolerance step being executed
- * @err_code - Fault tolerance error code
- * @fault - Indicates whether the hang was caused due to a pagefault
- * @start_of_replay_cmds - Offset in ringbuffer from where commands can be
- * replayed during fault tolerance
- * @replay_for_snapshot - Offset in ringbuffer where IB's can be saved for
- * replaying with snapshot
- */
-struct adreno_ft_data {
-	unsigned int ib1;
-	unsigned int context_id;
-	unsigned int global_eop;
-	unsigned int *rb_buffer;
-	unsigned int rb_size;
-	unsigned int *bad_rb_buffer;
-	unsigned int bad_rb_size;
-	unsigned int *good_rb_buffer;
-	unsigned int good_rb_size;
-	unsigned int last_valid_ctx_id;
-	unsigned int status;
-	unsigned int ft_policy;
-	unsigned int err_code;
-	unsigned int start_of_replay_cmds;
-	unsigned int replay_for_snapshot;
-};
-
 #define FT_DETECT_REGS_COUNT 12
 
 struct log_field {
@@ -354,21 +372,31 @@
 };
 
 /* Fault Tolerance policy flags */
-#define  KGSL_FT_OFF                      BIT(0)
-#define  KGSL_FT_REPLAY                   BIT(1)
-#define  KGSL_FT_SKIPIB                   BIT(2)
-#define  KGSL_FT_SKIPFRAME                BIT(3)
-#define  KGSL_FT_DISABLE                  BIT(4)
-#define  KGSL_FT_TEMP_DISABLE             BIT(5)
-#define  KGSL_FT_DEFAULT_POLICY           (KGSL_FT_REPLAY + KGSL_FT_SKIPIB)
+#define  KGSL_FT_OFF                      0
+#define  KGSL_FT_REPLAY                   1
+#define  KGSL_FT_SKIPIB                   2
+#define  KGSL_FT_SKIPFRAME                3
+#define  KGSL_FT_DISABLE                  4
+#define  KGSL_FT_TEMP_DISABLE             5
+#define  KGSL_FT_DEFAULT_POLICY (BIT(KGSL_FT_REPLAY) + BIT(KGSL_FT_SKIPIB))
+
+/* This internal bit is used to skip the PM dump on replayed command batches */
+#define  KGSL_FT_SKIP_PMDUMP              31
 
 /* Pagefault policy flags */
 #define KGSL_FT_PAGEFAULT_INT_ENABLE         BIT(0)
 #define KGSL_FT_PAGEFAULT_GPUHALT_ENABLE     BIT(1)
 #define KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE   BIT(2)
 #define KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT    BIT(3)
-#define KGSL_FT_PAGEFAULT_DEFAULT_POLICY     (KGSL_FT_PAGEFAULT_INT_ENABLE + \
-					KGSL_FT_PAGEFAULT_GPUHALT_ENABLE)
+#define KGSL_FT_PAGEFAULT_DEFAULT_POLICY     KGSL_FT_PAGEFAULT_INT_ENABLE
+
+#define ADRENO_FT_TYPES \
+	{ BIT(KGSL_FT_OFF), "off" }, \
+	{ BIT(KGSL_FT_REPLAY), "replay" }, \
+	{ BIT(KGSL_FT_SKIPIB), "skipib" }, \
+	{ BIT(KGSL_FT_SKIPFRAME), "skipframe" }, \
+	{ BIT(KGSL_FT_DISABLE), "disable" }, \
+	{ BIT(KGSL_FT_TEMP_DISABLE), "temp" }
 
 extern struct adreno_gpudev adreno_a2xx_gpudev;
 extern struct adreno_gpudev adreno_a3xx_gpudev;
@@ -399,6 +427,7 @@
 int adreno_coresight_init(struct platform_device *pdev);
 
 int adreno_idle(struct kgsl_device *device);
+bool adreno_isidle(struct kgsl_device *device);
 
 void adreno_shadermem_regread(struct kgsl_device *device,
 						unsigned int offsetwords,
@@ -425,13 +454,23 @@
 void *adreno_snapshot(struct kgsl_device *device, void *snapshot, int *remain,
 		int hang);
 
-int adreno_dump_and_exec_ft(struct kgsl_device *device);
+void adreno_dispatcher_start(struct kgsl_device *device);
+int adreno_dispatcher_init(struct adreno_device *adreno_dev);
+void adreno_dispatcher_close(struct adreno_device *adreno_dev);
+int adreno_dispatcher_idle(struct adreno_device *adreno_dev,
+		unsigned int timeout);
+void adreno_dispatcher_irq_fault(struct kgsl_device *device);
+void adreno_dispatcher_stop(struct adreno_device *adreno_dev);
 
-void adreno_dump_rb(struct kgsl_device *device, const void *buf,
-			 size_t len, int start, int size);
+int adreno_dispatcher_queue_cmd(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt, struct kgsl_cmdbatch *cmdbatch,
+		uint32_t *timestamp);
 
-unsigned int adreno_ft_detect(struct kgsl_device *device,
-						unsigned int *prev_reg_val);
+void adreno_dispatcher_schedule(struct kgsl_device *device);
+void adreno_dispatcher_pause(struct adreno_device *adreno_dev);
+void adreno_dispatcher_queue_context(struct kgsl_device *device,
+	struct adreno_context *drawctxt);
+int adreno_reset(struct kgsl_device *device);
 
 int adreno_ft_init_sysfs(struct kgsl_device *device);
 void adreno_ft_uninit_sysfs(struct kgsl_device *device);
@@ -549,9 +588,7 @@
 {
 	if (k_ctxt) {
 		struct adreno_context *a_ctxt = ADRENO_CONTEXT(k_ctxt);
-
-		if (a_ctxt->flags & CTXT_FLAGS_PER_CONTEXT_TS)
-			return a_ctxt->timestamp;
+		return a_ctxt->timestamp;
 	}
 	return rb->global_ts;
 }
@@ -755,4 +792,57 @@
 static inline void adreno_debugfs_init(struct kgsl_device *device) { }
 #endif
 
+/**
+ * adreno_gpu_fault() - Return the current state of the GPU
+ * @adreno_dev: A ponter to the adreno_device to query
+ *
+ * Return 0 if there is no fault or positive with the last type of fault that
+ * occurred
+ */
+static inline unsigned int adreno_gpu_fault(struct adreno_device *adreno_dev)
+{
+	smp_rmb();
+	return atomic_read(&adreno_dev->dispatcher.fault);
+}
+
+/**
+ * adreno_set_gpu_fault() - Set the current fault status of the GPU
+ * @adreno_dev: A pointer to the adreno_device to set
+ * @state: fault state to set
+ *
+ */
+static inline void adreno_set_gpu_fault(struct adreno_device *adreno_dev,
+	int state)
+{
+	/* only set the fault bit w/o overwriting other bits */
+	atomic_add(state, &adreno_dev->dispatcher.fault);
+	smp_wmb();
+}
+
+/**
+ * adreno_clear_gpu_fault() - Clear the GPU fault register
+ * @adreno_dev: A pointer to an adreno_device structure
+ *
+ * Clear the GPU fault status for the adreno device
+ */
+
+static inline void adreno_clear_gpu_fault(struct adreno_device *adreno_dev)
+{
+	atomic_set(&adreno_dev->dispatcher.fault, 0);
+	smp_wmb();
+}
+
+/*
+ * adreno_bootstrap_ucode() - Checks if Ucode bootstrapping is supported
+ * @adreno_dev:		Pointer to the the adreno device
+ */
+static inline int adreno_bootstrap_ucode(struct adreno_device *adreno_dev)
+{
+	if ((adreno_dev->pfp_bstrp_size) && (adreno_dev->pm4_bstrp_size)
+		&& (adreno_dev->pfp_fw_version >= adreno_dev->pfp_bstrp_ver))
+		return 1;
+	else
+		return 0;
+}
+
 #endif /*__ADRENO_H */
diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c
index 3d72c5c..24a0933 100644
--- a/drivers/gpu/msm/adreno_a2xx.c
+++ b/drivers/gpu/msm/adreno_a2xx.c
@@ -655,7 +655,7 @@
 	unsigned int addr = shadow->gmemshadow.gpuaddr;
 	unsigned int offset = (addr - (addr & 0xfffff000)) / bytesperpixel;
 
-	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		/* Store TP0_CHICKEN register */
 		*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
 		*cmds++ = REG_TP0_CHICKEN;
@@ -864,7 +864,7 @@
 	unsigned int *cmds = shadow->gmem_restore_commands;
 	unsigned int *start = cmds;
 
-	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		/* Store TP0_CHICKEN register */
 		*cmds++ = cp_type3_packet(CP_REG_TO_MEM, 2);
 		*cmds++ = REG_TP0_CHICKEN;
@@ -1334,8 +1334,6 @@
 static int a2xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
 			struct adreno_context *drawctxt)
 {
-	drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
-
 	/* build indirect command buffers to save & restore regs/constants */
 	build_regrestore_cmds(adreno_dev, drawctxt);
 	build_regsave_cmds(adreno_dev, drawctxt);
@@ -1355,14 +1353,12 @@
 	tmp_ctx.gmem_base = adreno_dev->gmem_base;
 
 	result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
-		drawctxt->base.pagetable, drawctxt->context_gmem_shadow.size);
+		drawctxt->base.proc_priv->pagetable,
+		drawctxt->context_gmem_shadow.size);
 
 	if (result)
 		return result;
 
-	/* set the gmem shadow flag for the context */
-	drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
-
 	/* blank out gmem shadow. */
 	kgsl_sharedmem_set(drawctxt->base.device,
 			&drawctxt->context_gmem_shadow.gmemshadow, 0, 0,
@@ -1373,7 +1369,7 @@
 		&tmp_ctx.cmd);
 
 	/* build TP0_CHICKEN register restore command buffer */
-	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE))
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE))
 		tmp_ctx.cmd = build_chicken_restore_cmds(drawctxt);
 
 	/* build indirect command buffers to save & restore gmem */
@@ -1397,11 +1393,56 @@
 	return 0;
 }
 
+static void a2xx_drawctxt_detach(struct adreno_context *drawctxt)
+{
+	kgsl_sharedmem_free(&drawctxt->gpustate);
+	kgsl_sharedmem_free(&drawctxt->context_gmem_shadow.gmemshadow);
+}
+
+static int a2xx_drawctxt_save(struct adreno_device *adreno_dev,
+			struct adreno_context *context);
+
+static int a2xx_drawctxt_restore(struct adreno_device *adreno_dev,
+			struct adreno_context *context);
+
+static int a2xx_drawctxt_draw_workaround(struct adreno_device *adreno_dev,
+					struct adreno_context *context);
+
+static const struct adreno_context_ops a225_preamble_ctx_ops = {
+	.restore = adreno_context_restore,
+	.draw_workaround = a2xx_drawctxt_draw_workaround,
+};
+
+static const struct adreno_context_ops a225_legacy_ctx_ops = {
+	.save = a2xx_drawctxt_save,
+	.restore = a2xx_drawctxt_restore,
+	.draw_workaround = a2xx_drawctxt_draw_workaround,
+	.detach = a2xx_drawctxt_detach,
+};
+
+static const struct adreno_context_ops a2xx_legacy_ctx_ops = {
+	.save = a2xx_drawctxt_save,
+	.restore = a2xx_drawctxt_restore,
+	.detach = a2xx_drawctxt_detach,
+};
+
+
 static int a2xx_drawctxt_create(struct adreno_device *adreno_dev,
 	struct adreno_context *drawctxt)
 {
 	int ret;
 
+	if (drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE
+	   && drawctxt->base.flags & KGSL_CONTEXT_NO_GMEM_ALLOC) {
+		drawctxt->ops = (adreno_is_a225(adreno_dev))
+			?  &a225_preamble_ctx_ops : &adreno_preamble_ctx_ops;
+
+		return 0;
+	}
+
+	drawctxt->ops = (adreno_is_a225(adreno_dev))
+			?  &a225_legacy_ctx_ops : &a2xx_legacy_ctx_ops;
+
 	/*
 	 * Allocate memory for the GPU state and the context commands.
 	 * Despite the name, this is much more then just storage for
@@ -1410,7 +1451,7 @@
 	 */
 
 	ret = kgsl_allocate(&drawctxt->gpustate,
-		drawctxt->base.pagetable, _context_size(adreno_dev));
+		drawctxt->base.proc_priv->pagetable, _context_size(adreno_dev));
 
 	if (ret)
 		return ret;
@@ -1421,15 +1462,14 @@
 	tmp_ctx.cmd = tmp_ctx.start
 	    = (unsigned int *)((char *)drawctxt->gpustate.hostptr + CMD_OFFSET);
 
-	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		ret = a2xx_create_gpustate_shadow(adreno_dev, drawctxt);
 		if (ret)
 			goto done;
 
-		drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
 	}
 
-	if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC)) {
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_NO_GMEM_ALLOC)) {
 		ret = a2xx_create_gmem_shadow(adreno_dev, drawctxt);
 		if (ret)
 			goto done;
@@ -1451,7 +1491,7 @@
 	return ret;
 }
 
-static void a2xx_drawctxt_draw_workaround(struct adreno_device *adreno_dev,
+static int a2xx_drawctxt_draw_workaround(struct adreno_device *adreno_dev,
 					struct adreno_context *context)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
@@ -1468,7 +1508,7 @@
 				ADRENO_NUM_CTX_SWITCH_ALLOWED_BEFORE_DRAW)
 			adreno_dev->gpudev->ctx_switches_since_last_draw = 0;
 		else
-			return;
+			return 0;
 		/*
 		 * Issue an empty draw call to avoid possible hangs due to
 		 * repeated idles without intervening draw calls.
@@ -1499,41 +1539,40 @@
 					| adreno_dev->pix_shader_start;
 	}
 
-	adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_PMODE,
-			&cmd[0], cmds - cmd);
+	return adreno_ringbuffer_issuecmds(device, context,
+			KGSL_CMD_FLAGS_PMODE, &cmd[0], cmds - cmd);
 }
 
-static void a2xx_drawctxt_save(struct adreno_device *adreno_dev,
+static int a2xx_drawctxt_save(struct adreno_device *adreno_dev,
 			struct adreno_context *context)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
+	int ret;
 
-	if (context == NULL || (context->flags & CTXT_FLAGS_BEING_DESTROYED))
-		return;
-
-	if (context->flags & CTXT_FLAGS_GPU_HANG)
-		KGSL_CTXT_WARN(device,
-			"Current active context has caused gpu hang\n");
-
-	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(context->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		kgsl_cffdump_syncmem(context->base.device, &context->gpustate,
 			context->reg_save[1],
 			context->reg_save[2] << 2, true);
 		/* save registers and constants. */
-		adreno_ringbuffer_issuecmds(device, context,
+		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE,
 			context->reg_save, 3);
 
-		if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
+		if (ret)
+			return ret;
+
+		if (test_bit(ADRENO_CONTEXT_SHADER_SAVE, &context->priv)) {
 			kgsl_cffdump_syncmem(context->base.device,
 				&context->gpustate,
 				context->shader_save[1],
 				context->shader_save[2] << 2, true);
 			/* save shader partitioning and instructions. */
-			adreno_ringbuffer_issuecmds(device, context,
+			ret = adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_PMODE,
 				context->shader_save, 3);
 
+			if (ret)
+				return ret;
 			kgsl_cffdump_syncmem(context->base.device,
 				&context->gpustate,
 				context->shader_fixup[1],
@@ -1542,132 +1581,130 @@
 			 * fixup shader partitioning parameter for
 			 *  SET_SHADER_BASES.
 			 */
-			adreno_ringbuffer_issuecmds(device, context,
+			ret = adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_NONE,
 				context->shader_fixup, 3);
 
-			context->flags |= CTXT_FLAGS_SHADER_RESTORE;
+			if (ret)
+				return ret;
+
+			set_bit(ADRENO_CONTEXT_SHADER_RESTORE, &context->priv);
 		}
 	}
 
-	if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
-	    (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
+	if (test_bit(ADRENO_CONTEXT_GMEM_SAVE, &context->priv)) {
 		kgsl_cffdump_syncmem(context->base.device, &context->gpustate,
 			context->context_gmem_shadow.gmem_save[1],
 			context->context_gmem_shadow.gmem_save[2] << 2, true);
 		/* save gmem.
 		 * (note: changes shader. shader must already be saved.)
 		 */
-		adreno_ringbuffer_issuecmds(device, context,
+		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_PMODE,
 			context->context_gmem_shadow.gmem_save, 3);
 
+		if (ret)
+			return ret;
+
 		kgsl_cffdump_syncmem(context->base.device, &context->gpustate,
 			context->chicken_restore[1],
 			context->chicken_restore[2] << 2, true);
 
 		/* Restore TP0_CHICKEN */
-		if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
-			adreno_ringbuffer_issuecmds(device, context,
+		if (!(context->base.flags & KGSL_CONTEXT_PREAMBLE)) {
+			ret = adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_NONE,
 				context->chicken_restore, 3);
+
+			if (ret)
+				return ret;
 		}
 		adreno_dev->gpudev->ctx_switches_since_last_draw = 0;
 
-		context->flags |= CTXT_FLAGS_GMEM_RESTORE;
+		set_bit(ADRENO_CONTEXT_GMEM_RESTORE, &context->priv);
 	} else if (adreno_is_a2xx(adreno_dev))
-		a2xx_drawctxt_draw_workaround(adreno_dev, context);
+		return a2xx_drawctxt_draw_workaround(adreno_dev, context);
+
+	return 0;
 }
 
-static void a2xx_drawctxt_restore(struct adreno_device *adreno_dev,
+static int a2xx_drawctxt_restore(struct adreno_device *adreno_dev,
 			struct adreno_context *context)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
-	unsigned int cmds[5];
+	int ret;
 
-	if (context == NULL) {
-		/* No context - set the default pagetable and thats it */
-		unsigned int id;
-		/*
-		 * If there isn't a current context, the kgsl_mmu_setstate
-		 * will use the CPU path so we don't need to give
-		 * it a valid context id.
-		 */
-		id = (adreno_dev->drawctxt_active != NULL)
-			? adreno_dev->drawctxt_active->base.id
-			: KGSL_CONTEXT_INVALID;
-		kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
-				  id);
-		return;
-	}
+	ret = adreno_context_restore(adreno_dev, context);
+	if (ret)
+		return ret;
 
-	cmds[0] = cp_nop_packet(1);
-	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
-	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
-	cmds[3] = device->memstore.gpuaddr +
-		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
-	cmds[4] = context->base.id;
-	adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE,
-					cmds, 5);
-	kgsl_mmu_setstate(&device->mmu, context->base.pagetable,
-			context->base.id);
-
-	/* restore gmem.
+	/*
+	 *  restore gmem.
 	 *  (note: changes shader. shader must not already be restored.)
 	 */
-	if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
+	if (test_bit(ADRENO_CONTEXT_GMEM_RESTORE, &context->priv)) {
 		kgsl_cffdump_syncmem(context->base.device, &context->gpustate,
 			context->context_gmem_shadow.gmem_restore[1],
 			context->context_gmem_shadow.gmem_restore[2] << 2,
 			true);
 
-		adreno_ringbuffer_issuecmds(device, context,
+		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_PMODE,
 			context->context_gmem_shadow.gmem_restore, 3);
+		if (ret)
+			return ret;
 
-		if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+		if (!(context->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 			kgsl_cffdump_syncmem(context->base.device,
 				&context->gpustate,
 				context->chicken_restore[1],
 				context->chicken_restore[2] << 2, true);
 
 			/* Restore TP0_CHICKEN */
-			adreno_ringbuffer_issuecmds(device, context,
+			ret = adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_NONE,
 				context->chicken_restore, 3);
+			if (ret)
+				return ret;
 		}
-
-		context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
+		clear_bit(ADRENO_CONTEXT_GMEM_RESTORE, &context->priv);
 	}
 
-	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(context->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		kgsl_cffdump_syncmem(context->base.device, &context->gpustate,
 			context->reg_restore[1],
 			context->reg_restore[2] << 2, true);
 
 		/* restore registers and constants. */
-		adreno_ringbuffer_issuecmds(device, context,
+		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE, context->reg_restore, 3);
+		if (ret)
+			return ret;
 
 		/* restore shader instructions & partitioning. */
-		if (context->flags & CTXT_FLAGS_SHADER_RESTORE) {
+		if (test_bit(ADRENO_CONTEXT_SHADER_RESTORE, &context->priv)) {
 			kgsl_cffdump_syncmem(context->base.device,
 				&context->gpustate,
 				context->shader_restore[1],
 				context->shader_restore[2] << 2, true);
 
-			adreno_ringbuffer_issuecmds(device, context,
+			ret = adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_NONE,
 				context->shader_restore, 3);
+			if (ret)
+				return ret;
 		}
 	}
 
 	if (adreno_is_a20x(adreno_dev)) {
+		unsigned int cmds[2];
 		cmds[0] = cp_type3_packet(CP_SET_BIN_BASE_OFFSET, 1);
 		cmds[1] = context->bin_base_offset;
-		adreno_ringbuffer_issuecmds(device, context,
+		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE, cmds, 2);
 	}
+
+	return ret;
 }
 
 /*
@@ -1734,13 +1771,14 @@
 
 	if (!status) {
 		if (master_status & MASTER_INT_SIGNAL__CP_INT_STAT) {
-			/* This indicates that we could not read CP_INT_STAT.
-			 * As a precaution just wake up processes so
-			 * they can check their timestamps. Since, we
-			 * did not ack any interrupts this interrupt will
-			 * be generated again */
+			/*
+			 * This indicates that we could not read CP_INT_STAT.
+			 * As a precaution schedule the dispatcher to check
+			 * things out. Since we did not ack any interrupts this
+			 * interrupt will be generated again
+			 */
 			KGSL_DRV_WARN(device, "Unable to read CP_INT_STATUS\n");
-			wake_up_interruptible_all(&device->wait_queue);
+			adreno_dispatcher_schedule(device);
 		} else
 			KGSL_DRV_WARN(device, "Spurious interrput detected\n");
 		return;
@@ -1766,7 +1804,7 @@
 
 	if (status & (CP_INT_CNTL__IB1_INT_MASK | CP_INT_CNTL__RB_INT_MASK)) {
 		queue_work(device->work_queue, &device->ts_expired_ws);
-		wake_up_interruptible_all(&device->wait_queue);
+		adreno_dispatcher_schedule(device);
 	}
 }
 
@@ -2050,6 +2088,8 @@
 	kgsl_regwrite(device, REG_SQ_INT_CNTL, 0);
 
 	a2xx_gmeminit(adreno_dev);
+
+	kgsl_regwrite(device, REG_CP_DEBUG, A2XX_CP_DEBUG_DEFAULT);
 }
 
 static void a2xx_postmortem_dump(struct adreno_device *adreno_dev)
@@ -2263,9 +2303,6 @@
 	.reg_offsets = &a2xx_reg_offsets,
 
 	.ctxt_create = a2xx_drawctxt_create,
-	.ctxt_save = a2xx_drawctxt_save,
-	.ctxt_restore = a2xx_drawctxt_restore,
-	.ctxt_draw_workaround = a2xx_drawctxt_draw_workaround,
 	.irq_handler = a2xx_irq_handler,
 	.irq_control = a2xx_irq_control,
 	.irq_pending = a2xx_irq_pending,
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
index f9110ea..df1794f 100644
--- a/drivers/gpu/msm/adreno_a3xx.c
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -2297,8 +2297,6 @@
 static int a3xx_create_gpustate_shadow(struct adreno_device *adreno_dev,
 				     struct adreno_context *drawctxt)
 {
-	drawctxt->flags |= CTXT_FLAGS_STATE_SHADOW;
-
 	build_regrestore_cmds(adreno_dev, drawctxt);
 	build_constantrestore_cmds(adreno_dev, drawctxt);
 	build_hlsqcontrol_restore_cmds(adreno_dev, drawctxt);
@@ -2321,7 +2319,8 @@
 	tmp_ctx.gmem_base = adreno_dev->gmem_base;
 
 	result = kgsl_allocate(&drawctxt->context_gmem_shadow.gmemshadow,
-		drawctxt->base.pagetable, drawctxt->context_gmem_shadow.size);
+		drawctxt->base.proc_priv->pagetable,
+		drawctxt->context_gmem_shadow.size);
 
 	if (result)
 		return result;
@@ -2337,25 +2336,46 @@
 	kgsl_cache_range_op(&drawctxt->context_gmem_shadow.gmemshadow,
 		KGSL_CACHE_OP_FLUSH);
 
-	drawctxt->flags |= CTXT_FLAGS_GMEM_SHADOW;
-
 	return 0;
 }
 
+static void a3xx_drawctxt_detach(struct adreno_context *drawctxt)
+{
+	kgsl_sharedmem_free(&drawctxt->gpustate);
+	kgsl_sharedmem_free(&drawctxt->context_gmem_shadow.gmemshadow);
+}
+
+static int a3xx_drawctxt_save(struct adreno_device *adreno_dev,
+			   struct adreno_context *context);
+
+static int a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
+			      struct adreno_context *context);
+
+static const struct adreno_context_ops a3xx_legacy_ctx_ops = {
+	.save = a3xx_drawctxt_save,
+	.restore = a3xx_drawctxt_restore,
+	.detach = a3xx_drawctxt_detach,
+};
+
+
 static int a3xx_drawctxt_create(struct adreno_device *adreno_dev,
 	struct adreno_context *drawctxt)
 {
 	int ret;
 
 	/*
-	 * Allocate memory for the GPU state and the context commands.
-	 * Despite the name, this is much more then just storage for
-	 * the gpustate.  This contains command space for gmem save
-	 * and texture and vertex buffer storage too
+	 * Nothing to do here if the context is using preambles and doesn't need
+	 * GMEM save/restore
 	 */
+	if ((drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE) &&
+		(drawctxt->base.flags & KGSL_CONTEXT_NO_GMEM_ALLOC)) {
+		drawctxt->ops = &adreno_preamble_ctx_ops;
+		return 0;
+	}
+	drawctxt->ops = &a3xx_legacy_ctx_ops;
 
 	ret = kgsl_allocate(&drawctxt->gpustate,
-		drawctxt->base.pagetable, CONTEXT_SIZE);
+		drawctxt->base.proc_priv->pagetable, CONTEXT_SIZE);
 
 	if (ret)
 		return ret;
@@ -2364,15 +2384,15 @@
 			CONTEXT_SIZE);
 	tmp_ctx.cmd = drawctxt->gpustate.hostptr + CMD_OFFSET;
 
-	if (!(drawctxt->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		ret = a3xx_create_gpustate_shadow(adreno_dev, drawctxt);
 		if (ret)
 			goto done;
 
-		drawctxt->flags |= CTXT_FLAGS_SHADER_SAVE;
+		set_bit(ADRENO_CONTEXT_SHADER_SAVE, &drawctxt->priv);
 	}
 
-	if (!(drawctxt->flags & CTXT_FLAGS_NOGMEMALLOC))
+	if (!(drawctxt->base.flags & KGSL_CONTEXT_NO_GMEM_ALLOC))
 		ret = a3xx_create_gmem_shadow(adreno_dev, drawctxt);
 
 done:
@@ -2382,39 +2402,40 @@
 	return ret;
 }
 
-static void a3xx_drawctxt_save(struct adreno_device *adreno_dev,
+static int a3xx_drawctxt_save(struct adreno_device *adreno_dev,
 			   struct adreno_context *context)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
+	int ret;
 
-	if (context == NULL || (context->flags & CTXT_FLAGS_BEING_DESTROYED))
-		return;
+	if (context->state == ADRENO_CONTEXT_STATE_INVALID)
+		return 0;
 
-	if (context->flags & CTXT_FLAGS_GPU_HANG)
-		KGSL_CTXT_WARN(device,
-			       "Current active context has caused gpu hang\n");
-
-	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
+	if (!(context->base.flags & KGSL_CONTEXT_PREAMBLE)) {
 		/* Fixup self modifying IBs for save operations */
-		adreno_ringbuffer_issuecmds(device, context,
+		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE, context->save_fixup, 3);
+		if (ret)
+			return ret;
 
 		/* save registers and constants. */
-		adreno_ringbuffer_issuecmds(device, context,
+		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE,
 			context->regconstant_save, 3);
+		if (ret)
+			return ret;
 
-		if (context->flags & CTXT_FLAGS_SHADER_SAVE) {
+		if (test_bit(ADRENO_CONTEXT_SHADER_SAVE, &context->priv)) {
 			/* Save shader instructions */
-			adreno_ringbuffer_issuecmds(device, context,
+			ret = adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_PMODE, context->shader_save, 3);
-
-			context->flags |= CTXT_FLAGS_SHADER_RESTORE;
+			if (ret)
+				return ret;
+			set_bit(ADRENO_CONTEXT_SHADER_RESTORE, &context->priv);
 		}
 	}
 
-	if ((context->flags & CTXT_FLAGS_GMEM_SAVE) &&
-	    (context->flags & CTXT_FLAGS_GMEM_SHADOW)) {
+	if (test_bit(ADRENO_CONTEXT_GMEM_SAVE, &context->priv)) {
 		/*
 		 * Save GMEM (note: changes shader. shader must
 		 * already be saved.)
@@ -2425,89 +2446,84 @@
 			context->context_gmem_shadow.gmem_save[1],
 			context->context_gmem_shadow.gmem_save[2] << 2, true);
 
-		adreno_ringbuffer_issuecmds(device, context,
+		ret = adreno_ringbuffer_issuecmds(device, context,
 					KGSL_CMD_FLAGS_PMODE,
 					    context->context_gmem_shadow.
 					    gmem_save, 3);
-		context->flags |= CTXT_FLAGS_GMEM_RESTORE;
+		if (ret)
+			return ret;
+
+		set_bit(ADRENO_CONTEXT_GMEM_RESTORE, &context->priv);
 	}
+
+	return 0;
 }
 
-static void a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
+static int a3xx_drawctxt_restore(struct adreno_device *adreno_dev,
 			      struct adreno_context *context)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
-	unsigned int cmds[5];
+	int ret;
 
-	if (context == NULL) {
-		/* No context - set the default pagetable and thats it */
-		unsigned int id;
-		/*
-		 * If there isn't a current context, the kgsl_mmu_setstate
-		 * will use the CPU path so we don't need to give
-		 * it a valid context id.
-		 */
-		id = (adreno_dev->drawctxt_active != NULL)
-			? adreno_dev->drawctxt_active->base.id
-			: KGSL_CONTEXT_INVALID;
-		kgsl_mmu_setstate(&device->mmu, device->mmu.defaultpagetable,
-				  id);
-		return;
-	}
-
-	cmds[0] = cp_nop_packet(1);
-	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
-	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
-	cmds[3] = device->memstore.gpuaddr +
-		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
-	cmds[4] = context->base.id;
-	adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE,
-					cmds, 5);
-	kgsl_mmu_setstate(&device->mmu, context->base.pagetable,
-			context->base.id);
+	/* do the common part */
+	ret = adreno_context_restore(adreno_dev, context);
+	if (ret)
+		return ret;
 
 	/*
 	 * Restore GMEM.  (note: changes shader.
 	 * Shader must not already be restored.)
 	 */
 
-	if (context->flags & CTXT_FLAGS_GMEM_RESTORE) {
+	if (test_bit(ADRENO_CONTEXT_GMEM_RESTORE, &context->priv)) {
 		kgsl_cffdump_syncmem(context->base.device,
 			&context->gpustate,
 			context->context_gmem_shadow.gmem_restore[1],
 			context->context_gmem_shadow.gmem_restore[2] << 2,
 			true);
 
-		adreno_ringbuffer_issuecmds(device, context,
+		ret = adreno_ringbuffer_issuecmds(device, context,
 					KGSL_CMD_FLAGS_PMODE,
 					    context->context_gmem_shadow.
 					    gmem_restore, 3);
-		context->flags &= ~CTXT_FLAGS_GMEM_RESTORE;
+		if (ret)
+			return ret;
+		clear_bit(ADRENO_CONTEXT_GMEM_RESTORE, &context->priv);
 	}
 
-	if (!(context->flags & CTXT_FLAGS_PREAMBLE)) {
-		adreno_ringbuffer_issuecmds(device, context,
+	if (!(context->base.flags & KGSL_CONTEXT_PREAMBLE)) {
+		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE, context->reg_restore, 3);
+		if (ret)
+			return ret;
 
 		/* Fixup self modifying IBs for restore operations */
-		adreno_ringbuffer_issuecmds(device, context,
+		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE,
 			context->restore_fixup, 3);
+		if (ret)
+			return ret;
 
-		adreno_ringbuffer_issuecmds(device, context,
+		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE,
 			context->constant_restore, 3);
+		if (ret)
+			return ret;
 
-		if (context->flags & CTXT_FLAGS_SHADER_RESTORE)
-			adreno_ringbuffer_issuecmds(device, context,
+		if (test_bit(ADRENO_CONTEXT_SHADER_RESTORE, &context->priv)) {
+			ret = adreno_ringbuffer_issuecmds(device, context,
 				KGSL_CMD_FLAGS_NONE,
 				context->shader_restore, 3);
-
+			if (ret)
+				return ret;
+		}
 		/* Restore HLSQ_CONTROL_0 register */
-		adreno_ringbuffer_issuecmds(device, context,
+		ret = adreno_ringbuffer_issuecmds(device, context,
 			KGSL_CMD_FLAGS_NONE,
 			context->hlsqcontrol_restore, 3);
 	}
+
+	return ret;
 }
 
 static const unsigned int _a3xx_pwron_fixup_fs_instructions[] = {
@@ -3042,7 +3058,7 @@
 
 		/* Clear the error */
 		kgsl_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
-		return;
+		goto done;
 	}
 	case A3XX_INT_RBBM_REG_TIMEOUT:
 		err = "RBBM: AHB register timeout";
@@ -3083,170 +3099,62 @@
 	case A3XX_INT_UCHE_OOB_ACCESS:
 		err = "UCHE:  Out of bounds access";
 		break;
+	default:
+		return;
 	}
-
 	KGSL_DRV_CRIT(device, "%s\n", err);
 	kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+
+done:
+	/* Trigger a fault in the dispatcher - this will effect a restart */
+	adreno_dispatcher_irq_fault(device);
 }
 
 static void a3xx_cp_callback(struct adreno_device *adreno_dev, int irq)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
 
-	/* Wake up everybody waiting for the interrupt */
-	wake_up_interruptible_all(&device->wait_queue);
-
-	/* Schedule work to free mem and issue ibs */
+	device->pwrctrl.irq_last = 1;
 	queue_work(device->work_queue, &device->ts_expired_ws);
+	adreno_dispatcher_schedule(device);
 }
 
-/**
- * struct a3xx_perfcounter_register - Define a performance counter register
- * @load_bit: the bit to set in RBBM_LOAD_CMD0/RBBM_LOAD_CMD1 to force the RBBM
- * to load the reset value into the appropriate counter
- * @select: The dword offset of the register to write the selected
- * countable into
- */
 
-struct a3xx_perfcounter_register {
-	unsigned int load_bit;
-	unsigned int select;
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_cp[] = {
-	{ 0, A3XX_CP_PERFCOUNTER_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_rbbm[] = {
-	{ 1, A3XX_RBBM_PERFCOUNTER0_SELECT },
-	{ 2, A3XX_RBBM_PERFCOUNTER1_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_pc[] = {
-	{ 3, A3XX_PC_PERFCOUNTER0_SELECT },
-	{ 4, A3XX_PC_PERFCOUNTER1_SELECT },
-	{ 5, A3XX_PC_PERFCOUNTER2_SELECT },
-	{ 6, A3XX_PC_PERFCOUNTER3_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_vfd[] = {
-	{ 7, A3XX_VFD_PERFCOUNTER0_SELECT },
-	{ 8, A3XX_VFD_PERFCOUNTER1_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_hlsq[] = {
-	{ 9, A3XX_HLSQ_PERFCOUNTER0_SELECT },
-	{ 10, A3XX_HLSQ_PERFCOUNTER1_SELECT },
-	{ 11, A3XX_HLSQ_PERFCOUNTER2_SELECT },
-	{ 12, A3XX_HLSQ_PERFCOUNTER3_SELECT },
-	{ 13, A3XX_HLSQ_PERFCOUNTER4_SELECT },
-	{ 14, A3XX_HLSQ_PERFCOUNTER5_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_vpc[] = {
-	{ 15, A3XX_VPC_PERFCOUNTER0_SELECT },
-	{ 16, A3XX_VPC_PERFCOUNTER1_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_tse[] = {
-	{ 17, A3XX_GRAS_PERFCOUNTER0_SELECT },
-	{ 18, A3XX_GRAS_PERFCOUNTER1_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_ras[] = {
-	{ 19, A3XX_GRAS_PERFCOUNTER2_SELECT },
-	{ 20, A3XX_GRAS_PERFCOUNTER3_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_uche[] = {
-	{ 21, A3XX_UCHE_PERFCOUNTER0_SELECT },
-	{ 22, A3XX_UCHE_PERFCOUNTER1_SELECT },
-	{ 23, A3XX_UCHE_PERFCOUNTER2_SELECT },
-	{ 24, A3XX_UCHE_PERFCOUNTER3_SELECT },
-	{ 25, A3XX_UCHE_PERFCOUNTER4_SELECT },
-	{ 26, A3XX_UCHE_PERFCOUNTER5_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_tp[] = {
-	{ 27, A3XX_TP_PERFCOUNTER0_SELECT },
-	{ 28, A3XX_TP_PERFCOUNTER1_SELECT },
-	{ 29, A3XX_TP_PERFCOUNTER2_SELECT },
-	{ 30, A3XX_TP_PERFCOUNTER3_SELECT },
-	{ 31, A3XX_TP_PERFCOUNTER4_SELECT },
-	{ 32, A3XX_TP_PERFCOUNTER5_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_sp[] = {
-	{ 33, A3XX_SP_PERFCOUNTER0_SELECT },
-	{ 34, A3XX_SP_PERFCOUNTER1_SELECT },
-	{ 35, A3XX_SP_PERFCOUNTER2_SELECT },
-	{ 36, A3XX_SP_PERFCOUNTER3_SELECT },
-	{ 37, A3XX_SP_PERFCOUNTER4_SELECT },
-	{ 38, A3XX_SP_PERFCOUNTER5_SELECT },
-	{ 39, A3XX_SP_PERFCOUNTER6_SELECT },
-	{ 40, A3XX_SP_PERFCOUNTER7_SELECT },
-};
-
-static struct a3xx_perfcounter_register a3xx_perfcounter_reg_rb[] = {
-	{ 41, A3XX_RB_PERFCOUNTER0_SELECT },
-	{ 42, A3XX_RB_PERFCOUNTER1_SELECT },
-};
-
-#define REGCOUNTER_GROUP(_x) { (_x), ARRAY_SIZE((_x)) }
-
-static struct {
-	struct a3xx_perfcounter_register *regs;
-	int count;
-} a3xx_perfcounter_reglist[] = {
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_cp),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_rbbm),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_pc),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_vfd),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_hlsq),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_vpc),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_tse),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_ras),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_uche),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_tp),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_sp),
-	REGCOUNTER_GROUP(a3xx_perfcounter_reg_rb),
-};
-
-static void a3xx_perfcounter_enable_pwr(struct kgsl_device *device,
-	unsigned int countable)
+static int a3xx_perfcounter_enable_pwr(struct kgsl_device *device,
+	unsigned int counter)
 {
 	unsigned int in, out;
 
-	if (countable > 1)
-		return;
+	if (counter > 1)
+		return -EINVAL;
 
 	kgsl_regread(device, A3XX_RBBM_RBBM_CTL, &in);
 
-	if (countable == 0)
+	if (counter == 0)
 		out = in | RBBM_RBBM_CTL_RESET_PWR_CTR0;
 	else
 		out = in | RBBM_RBBM_CTL_RESET_PWR_CTR1;
 
 	kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, out);
 
-	if (countable == 0)
+	if (counter == 0)
 		out = in | RBBM_RBBM_CTL_ENABLE_PWR_CTR0;
 	else
 		out = in | RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
 
 	kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, out);
 
-	return;
+	return 0;
 }
 
-static void a3xx_perfcounter_enable_vbif(struct kgsl_device *device,
+static int a3xx_perfcounter_enable_vbif(struct kgsl_device *device,
 					 unsigned int counter,
 					 unsigned int countable)
 {
 	unsigned int in, out, bit, sel;
 
 	if (counter > 1 || countable > 0x7f)
-		return;
+		return -EINVAL;
 
 	kgsl_regread(device, A3XX_VBIF_PERF_CNT_EN, &in);
 	kgsl_regread(device, A3XX_VBIF_PERF_CNT_SEL, &sel);
@@ -3268,20 +3176,21 @@
 	kgsl_regwrite(device, A3XX_VBIF_PERF_CNT_CLR, 0);
 
 	kgsl_regwrite(device, A3XX_VBIF_PERF_CNT_EN, out);
+	return 0;
 }
 
-static void a3xx_perfcounter_enable_vbif_pwr(struct kgsl_device *device,
-					     unsigned int countable)
+static int a3xx_perfcounter_enable_vbif_pwr(struct kgsl_device *device,
+					     unsigned int counter)
 {
 	unsigned int in, out, bit;
 
-	if (countable > 2)
-		return;
+	if (counter > 2)
+		return -EINVAL;
 
 	kgsl_regread(device, A3XX_VBIF_PERF_CNT_EN, &in);
-	if (countable == 0)
+	if (counter == 0)
 		bit = VBIF_PERF_PWR_CNT_0;
-	else if (countable == 1)
+	else if (counter == 1)
 		bit = VBIF_PERF_PWR_CNT_1;
 	else
 		bit = VBIF_PERF_PWR_CNT_2;
@@ -3292,6 +3201,7 @@
 	kgsl_regwrite(device, A3XX_VBIF_PERF_CNT_CLR, 0);
 
 	kgsl_regwrite(device, A3XX_VBIF_PERF_CNT_EN, out);
+	return 0;
 }
 
 /*
@@ -3302,73 +3212,167 @@
  * @countable - Desired countable
  *
  * Physically set up a counter within a group with the desired countable
+ * Return 0 on success else error code
  */
 
-static void a3xx_perfcounter_enable(struct adreno_device *adreno_dev,
+static int a3xx_perfcounter_enable(struct adreno_device *adreno_dev,
 	unsigned int group, unsigned int counter, unsigned int countable)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
-	unsigned int val = 0;
-	struct a3xx_perfcounter_register *reg;
+	struct adreno_perfcount_register *reg;
 
 	/* Special cases */
 	if (group == KGSL_PERFCOUNTER_GROUP_PWR)
-		return a3xx_perfcounter_enable_pwr(device, countable);
+		return a3xx_perfcounter_enable_pwr(device, counter);
 	else if (group == KGSL_PERFCOUNTER_GROUP_VBIF)
-		return a3xx_perfcounter_enable_vbif(device, counter, countable);
+		return a3xx_perfcounter_enable_vbif(device, counter,
+							countable);
 	else if (group == KGSL_PERFCOUNTER_GROUP_VBIF_PWR)
-		return a3xx_perfcounter_enable_vbif_pwr(device, countable);
+		return a3xx_perfcounter_enable_vbif_pwr(device, counter);
 
-	if (group >= ARRAY_SIZE(a3xx_perfcounter_reglist))
-		return;
+	if (group >= adreno_dev->gpudev->perfcounters->group_count)
+		return -EINVAL;
 
-	if (counter >= a3xx_perfcounter_reglist[group].count)
-		return;
+	if ((0 == adreno_dev->gpudev->perfcounters->groups[group].reg_count) ||
+		(counter >=
+		adreno_dev->gpudev->perfcounters->groups[group].reg_count))
+		return -EINVAL;
 
-	reg = &(a3xx_perfcounter_reglist[group].regs[counter]);
+	reg = &(adreno_dev->gpudev->perfcounters->groups[group].regs[counter]);
 
 	/* Select the desired perfcounter */
 	kgsl_regwrite(device, reg->select, countable);
 
-	if (reg->load_bit < 32) {
-		val = 1 << reg->load_bit;
-		kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD0, val);
-	} else {
-		val  = 1 << (reg->load_bit - 32);
-		kgsl_regwrite(device, A3XX_RBBM_PERFCTR_LOAD_CMD1, val);
-	}
+	return 0;
+}
+
+static uint64_t a3xx_perfcounter_read_pwr(struct adreno_device *adreno_dev,
+				unsigned int counter)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+	struct adreno_perfcount_register *reg;
+	unsigned int in, out, lo = 0, hi = 0;
+	unsigned int enable_bit;
+
+	if (counter > 1)
+		return 0;
+	if (0 == counter)
+		enable_bit = RBBM_RBBM_CTL_ENABLE_PWR_CTR0;
+	else
+		enable_bit = RBBM_RBBM_CTL_ENABLE_PWR_CTR1;
+	/* freeze counter */
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_RBBM_CTL, &in);
+	out = (in & ~enable_bit);
+	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_RBBM_CTL, out);
+
+	reg = &counters->groups[KGSL_PERFCOUNTER_GROUP_PWR].regs[counter];
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset + 1, &hi);
+
+	/* restore the counter control value */
+	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_RBBM_CTL, in);
+
+	return (((uint64_t) hi) << 32) | lo;
+}
+
+static uint64_t a3xx_perfcounter_read_vbif(struct adreno_device *adreno_dev,
+				unsigned int counter)
+{
+	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcount_register *reg;
+	unsigned int in, out, lo = 0, hi = 0;
+
+	if (counter > 1)
+		return 0;
+
+	/* freeze counter */
+	kgsl_regread(device, A3XX_VBIF_PERF_CNT_EN, &in);
+	if (counter == 0)
+		out = (in & ~VBIF_PERF_CNT_0);
+	else
+		out = (in & ~VBIF_PERF_CNT_1);
+	kgsl_regwrite(device, A3XX_VBIF_PERF_CNT_EN, out);
+
+	reg = &counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF].regs[counter];
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset + 1, &hi);
+
+	/* restore the perfcounter value */
+	kgsl_regwrite(device, A3XX_VBIF_PERF_CNT_EN, in);
+
+	return (((uint64_t) hi) << 32) | lo;
+}
+
+static uint64_t a3xx_perfcounter_read_vbif_pwr(struct adreno_device *adreno_dev,
+				unsigned int counter)
+{
+	struct adreno_perfcounters *counters = adreno_dev->gpudev->perfcounters;
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_perfcount_register *reg;
+	unsigned int in, out, lo = 0, hi = 0;
+
+	if (counter > 2)
+		return 0;
+
+	/* freeze counter */
+	kgsl_regread(device, A3XX_VBIF_PERF_CNT_EN, &in);
+	if (0 == counter)
+		out = (in & ~VBIF_PERF_PWR_CNT_0);
+	else
+		out = (in & ~VBIF_PERF_PWR_CNT_2);
+	kgsl_regwrite(device, A3XX_VBIF_PERF_CNT_EN, out);
+
+	reg = &counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF_PWR].regs[counter];
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset + 1, &hi);
+	/* restore the perfcounter value */
+	kgsl_regwrite(device, A3XX_VBIF_PERF_CNT_EN, in);
+
+	return (((uint64_t) hi) << 32) | lo;
 }
 
 static uint64_t a3xx_perfcounter_read(struct adreno_device *adreno_dev,
-	unsigned int group, unsigned int counter,
-	unsigned int offset)
+	unsigned int group, unsigned int counter)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
-	struct a3xx_perfcounter_register *reg = NULL;
+	struct adreno_perfcount_register *reg;
 	unsigned int lo = 0, hi = 0;
-	unsigned int val;
+	unsigned int offset;
+	unsigned int in, out;
 
-	if (group >= ARRAY_SIZE(a3xx_perfcounter_reglist))
+	if (group == KGSL_PERFCOUNTER_GROUP_VBIF_PWR)
+		return a3xx_perfcounter_read_vbif_pwr(adreno_dev, counter);
+
+	if (group == KGSL_PERFCOUNTER_GROUP_VBIF)
+		return a3xx_perfcounter_read_vbif(adreno_dev, counter);
+
+	if (group == KGSL_PERFCOUNTER_GROUP_PWR)
+		return a3xx_perfcounter_read_pwr(adreno_dev, counter);
+
+	if (group >= adreno_dev->gpudev->perfcounters->group_count)
 		return 0;
 
-	if (counter >= a3xx_perfcounter_reglist[group].count)
+	if ((0 == adreno_dev->gpudev->perfcounters->groups[group].reg_count) ||
+		(counter >=
+		adreno_dev->gpudev->perfcounters->groups[group].reg_count))
 		return 0;
 
-	reg = &(a3xx_perfcounter_reglist[group].regs[counter]);
+	reg = &(adreno_dev->gpudev->perfcounters->groups[group].regs[counter]);
 
 	/* Freeze the counter */
-	kgsl_regread(device, A3XX_RBBM_PERFCTR_CTL, &val);
-	val &= ~reg->load_bit;
-	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val);
+	kgsl_regread(device, A3XX_RBBM_PERFCTR_CTL, &in);
+	out = in & ~RBBM_PERFCTR_CTL_ENABLE;
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, out);
 
+	offset = reg->offset;
 	/* Read the values */
 	kgsl_regread(device, offset, &lo);
 	kgsl_regread(device, offset + 1, &hi);
 
 	/* Re-Enable the counter */
-	val |= reg->load_bit;
-	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, val);
-
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, in);
 	return (((uint64_t) hi) << 32) | lo;
 }
 
@@ -3624,118 +3628,160 @@
  */
 
 static struct adreno_perfcount_register a3xx_perfcounters_cp[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_CP_0_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_CP_0_LO,
+		0, A3XX_CP_PERFCOUNTER_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO,
+		1, A3XX_RBBM_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO,
+		2, A3XX_RBBM_PERFCOUNTER1_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_pc[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_1_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_2_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_3_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_0_LO,
+		3, A3XX_PC_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_1_LO,
+		4, A3XX_PC_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_2_LO,
+		5, A3XX_PC_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_3_LO,
+		6, A3XX_PC_PERFCOUNTER3_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_1_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_0_LO,
+		7, A3XX_VFD_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_1_LO,
+		8, A3XX_VFD_PERFCOUNTER1_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO,
+		9, A3XX_HLSQ_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO,
+		10, A3XX_HLSQ_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO,
+		11, A3XX_HLSQ_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO,
+		12, A3XX_HLSQ_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO,
+		13, A3XX_HLSQ_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO,
+		14, A3XX_HLSQ_PERFCOUNTER5_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_1_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_0_LO,
+		15, A3XX_VPC_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_1_LO,
+		16, A3XX_VPC_PERFCOUNTER1_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_tse[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_1_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_0_LO,
+		17, A3XX_GRAS_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_1_LO,
+		18, A3XX_GRAS_PERFCOUNTER1_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_ras[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_1_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_0_LO,
+		19, A3XX_GRAS_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_1_LO,
+		20, A3XX_GRAS_PERFCOUNTER3_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_uche[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO,
+		21, A3XX_UCHE_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO,
+		22, A3XX_UCHE_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO,
+		23, A3XX_UCHE_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO,
+		24, A3XX_UCHE_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO,
+		25, A3XX_UCHE_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO,
+		26, A3XX_UCHE_PERFCOUNTER5_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_tp[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_1_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_2_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_3_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_4_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_5_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_0_LO,
+		27, A3XX_TP_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_1_LO,
+		28, A3XX_TP_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_2_LO,
+		29, A3XX_TP_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_3_LO,
+		30, A3XX_TP_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_4_LO,
+		31, A3XX_TP_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_5_LO,
+		32, A3XX_TP_PERFCOUNTER5_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_sp[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_1_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_2_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_3_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_4_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_5_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_6_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_7_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_0_LO,
+		33, A3XX_SP_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_1_LO,
+		34, A3XX_SP_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_2_LO,
+		35, A3XX_SP_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_3_LO,
+		36, A3XX_SP_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_4_LO,
+		37, A3XX_SP_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_5_LO,
+		38, A3XX_SP_PERFCOUNTER5_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_6_LO,
+		39, A3XX_SP_PERFCOUNTER6_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_7_LO,
+		40, A3XX_SP_PERFCOUNTER7_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_rb[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_1_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_0_LO,
+		41, A3XX_RB_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_1_LO,
+		42, A3XX_RB_PERFCOUNTER1_SELECT },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_1_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_0_LO,
+		-1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_1_LO,
+		-1, 0 },
 };
 
 static struct adreno_perfcount_register a3xx_perfcounters_vbif[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT1_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT0_LO, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT1_LO, -1, 0 },
 };
 static struct adreno_perfcount_register a3xx_perfcounters_vbif_pwr[] = {
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT0_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT1_LO },
-	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT2_LO },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT0_LO, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT1_LO, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT2_LO, -1, 0 },
 };
 
-#define A3XX_PERFCOUNTER_GROUP(name) { a3xx_perfcounters_##name, \
-	ARRAY_SIZE(a3xx_perfcounters_##name), __stringify(name) }
-
 static struct adreno_perfcount_group a3xx_perfcounter_groups[] = {
-	A3XX_PERFCOUNTER_GROUP(cp),
-	A3XX_PERFCOUNTER_GROUP(rbbm),
-	A3XX_PERFCOUNTER_GROUP(pc),
-	A3XX_PERFCOUNTER_GROUP(vfd),
-	A3XX_PERFCOUNTER_GROUP(hlsq),
-	A3XX_PERFCOUNTER_GROUP(vpc),
-	A3XX_PERFCOUNTER_GROUP(tse),
-	A3XX_PERFCOUNTER_GROUP(ras),
-	A3XX_PERFCOUNTER_GROUP(uche),
-	A3XX_PERFCOUNTER_GROUP(tp),
-	A3XX_PERFCOUNTER_GROUP(sp),
-	A3XX_PERFCOUNTER_GROUP(rb),
-	A3XX_PERFCOUNTER_GROUP(pwr),
-	A3XX_PERFCOUNTER_GROUP(vbif),
-	A3XX_PERFCOUNTER_GROUP(vbif_pwr),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, cp),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, rbbm),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, pc),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, vfd),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, hlsq),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, vpc),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, tse),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, ras),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, uche),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, tp),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, sp),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, rb),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, pwr),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, vbif),
+	ADRENO_PERFCOUNTER_GROUP(a3xx, vbif_pwr),
 };
 
 static struct adreno_perfcounters a3xx_perfcounters = {
@@ -3743,8 +3789,30 @@
 	ARRAY_SIZE(a3xx_perfcounter_groups),
 };
 
-static void a3xx_perfcounter_init(struct adreno_device *adreno_dev)
+/*
+ * a3xx_perfcounter_close() - Return counters that were initialized in
+ * a3xx_perfcounter_init
+ * @adreno_dev: The device for which counters were initialized
+ */
+static void a3xx_perfcounter_close(struct adreno_device *adreno_dev)
 {
+	adreno_perfcounter_put(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+		SP_FS_FULL_ALU_INSTRUCTIONS,
+		PERFCOUNTER_FLAG_KERNEL);
+	adreno_perfcounter_put(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+		SP_FS_CFLOW_INSTRUCTIONS,
+		PERFCOUNTER_FLAG_KERNEL);
+	adreno_perfcounter_put(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+		SP0_ICL1_MISSES,
+		PERFCOUNTER_FLAG_KERNEL);
+	adreno_perfcounter_put(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+		SP_ALU_ACTIVE_CYCLES,
+		PERFCOUNTER_FLAG_KERNEL);
+}
+
+static int a3xx_perfcounter_init(struct adreno_device *adreno_dev)
+{
+	int ret;
 	/* SP[3] counter is broken on a330 so disable it if a330 device */
 	if (adreno_is_a330(adreno_dev))
 		a3xx_perfcounters_sp[3].countable = KGSL_PERFCOUNTER_BROKEN;
@@ -3759,29 +3827,47 @@
 	 * we will use this to augment our hang detection
 	 */
 	if (adreno_dev->fast_hang_detect) {
-		adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+		ret = adreno_perfcounter_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_SP,
 			SP_ALU_ACTIVE_CYCLES, &ft_detect_regs[6],
 			PERFCOUNTER_FLAG_KERNEL);
+		if (ret)
+			goto err;
 		ft_detect_regs[7] = ft_detect_regs[6] + 1;
-		adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+		ret = adreno_perfcounter_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_SP,
 			SP0_ICL1_MISSES, &ft_detect_regs[8],
 			PERFCOUNTER_FLAG_KERNEL);
+		if (ret)
+			goto err;
 		ft_detect_regs[9] = ft_detect_regs[8] + 1;
-		adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+		ret = adreno_perfcounter_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_SP,
 			SP_FS_CFLOW_INSTRUCTIONS, &ft_detect_regs[10],
 			PERFCOUNTER_FLAG_KERNEL);
+		if (ret)
+			goto err;
 		ft_detect_regs[11] = ft_detect_regs[10] + 1;
 	}
 
-	adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
+	ret = adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_SP,
 		SP_FS_FULL_ALU_INSTRUCTIONS, NULL, PERFCOUNTER_FLAG_KERNEL);
+	if (ret)
+		goto err;
 
 	/* Reserve and start countable 1 in the PWR perfcounter group */
-	adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
+	ret = adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
 			NULL, PERFCOUNTER_FLAG_KERNEL);
+	if (ret)
+		goto err;
 
 	/* Default performance counter profiling to false */
 	adreno_dev->profile.enabled = false;
+	return ret;
+
+err:
+	a3xx_perfcounter_close(adreno_dev);
+	return ret;
 }
 
 /**
@@ -3890,6 +3976,9 @@
 	/* Turn on the GPU busy counter and let it run free */
 
 	adreno_dev->gpu_cycles = 0;
+
+	/* the CP_DEBUG register offset and value are same as A2XX */
+	kgsl_regwrite(device, REG_CP_DEBUG, A2XX_CP_DEBUG_DEFAULT);
 }
 
 /**
@@ -4240,6 +4329,7 @@
 				REG_SQ_INST_STORE_MANAGMENT),
 	ADRENO_REG_DEFINE(ADRENO_REG_TC_CNTL_STATUS, REG_TC_CNTL_STATUS),
 	ADRENO_REG_DEFINE(ADRENO_REG_TP0_CHICKEN, REG_TP0_CHICKEN),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_RBBM_CTL, A3XX_RBBM_RBBM_CTL),
 };
 
 struct adreno_reg_offsets a3xx_reg_offsets = {
@@ -4252,11 +4342,9 @@
 	.perfcounters = &a3xx_perfcounters,
 
 	.ctxt_create = a3xx_drawctxt_create,
-	.ctxt_save = a3xx_drawctxt_save,
-	.ctxt_restore = a3xx_drawctxt_restore,
-	.ctxt_draw_workaround = NULL,
 	.rb_init = a3xx_rb_init,
 	.perfcounter_init = a3xx_perfcounter_init,
+	.perfcounter_close = a3xx_perfcounter_close,
 	.irq_control = a3xx_irq_control,
 	.irq_handler = a3xx_irq_handler,
 	.irq_pending = a3xx_irq_pending,
diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c
new file mode 100644
index 0000000..2da36b6
--- /dev/null
+++ b/drivers/gpu/msm/adreno_dispatch.c
@@ -0,0 +1,1712 @@
+/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/jiffies.h>
+#include <linux/err.h>
+
+#include "kgsl.h"
+#include "adreno.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+
+#define CMDQUEUE_NEXT(_i, _s) (((_i) + 1) % (_s))
+
+/* Number of commands that can be queued in a context before it sleeps */
+static unsigned int _context_cmdqueue_size = 50;
+
+/* Number of milliseconds to wait for the context queue to clear */
+static unsigned int _context_queue_wait = 10000;
+
+/* Number of command batches sent at a time from a single context */
+static unsigned int _context_cmdbatch_burst = 5;
+
+/* Number of command batches inflight in the ringbuffer at any time */
+static unsigned int _dispatcher_inflight = 15;
+
+/* Command batch timeout (in milliseconds) */
+static unsigned int _cmdbatch_timeout = 2000;
+
+/* Interval for reading and comparing fault detection registers */
+static unsigned int _fault_timer_interval = 50;
+
+/* Local array for the current set of fault detect registers */
+static unsigned int fault_detect_regs[FT_DETECT_REGS_COUNT];
+
+/* The last retired global timestamp read during fault detect */
+static unsigned int fault_detect_ts;
+
+/**
+ * fault_detect_read() - Read the set of fault detect registers
+ * @device: Pointer to the KGSL device struct
+ *
+ * Read the set of fault detect registers and store them in the local array.
+ * This is for the initial values that are compared later with
+ * fault_detect_read_compare
+ */
+static void fault_detect_read(struct kgsl_device *device)
+{
+	int i;
+
+	fault_detect_ts = kgsl_readtimestamp(device, NULL,
+		KGSL_TIMESTAMP_RETIRED);
+
+	for (i = 0; i < FT_DETECT_REGS_COUNT; i++) {
+		if (ft_detect_regs[i] == 0)
+			continue;
+		kgsl_regread(device, ft_detect_regs[i],
+			&fault_detect_regs[i]);
+	}
+}
+
+/*
+ * Check to see if the device is idle and that the global timestamp is up to
+ * date
+ */
+static inline bool _isidle(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int ts;
+
+	ts = kgsl_readtimestamp(device, NULL, KGSL_TIMESTAMP_RETIRED);
+
+	if (adreno_isidle(device) == true &&
+		(ts >= adreno_dev->ringbuffer.global_ts))
+		return true;
+
+	return false;
+}
+
+/**
+ * fault_detect_read_compare() - Read the fault detect registers and compare
+ * them to the current value
+ * @device: Pointer to the KGSL device struct
+ *
+ * Read the set of fault detect registers and compare them to the current set
+ * of registers.  Return 1 if any of the register values changed
+ */
+static int fault_detect_read_compare(struct kgsl_device *device)
+{
+	int i, ret = 0;
+	unsigned int ts;
+
+	/* Check to see if the device is idle - if so report no hang */
+	if (_isidle(device) == true)
+		ret = 1;
+
+	for (i = 0; i < FT_DETECT_REGS_COUNT; i++) {
+		unsigned int val;
+
+		if (ft_detect_regs[i] == 0)
+			continue;
+		kgsl_regread(device, ft_detect_regs[i], &val);
+		if (val != fault_detect_regs[i])
+			ret = 1;
+		fault_detect_regs[i] = val;
+	}
+
+	ts = kgsl_readtimestamp(device, NULL, KGSL_TIMESTAMP_RETIRED);
+	if (ts != fault_detect_ts)
+		ret = 1;
+
+	fault_detect_ts = ts;
+
+	return ret;
+}
+
+/**
+ * adreno_dispatcher_get_cmdbatch() - Get a new command from a context queue
+ * @drawctxt: Pointer to the adreno draw context
+ *
+ * Dequeue a new command batch from the context list
+ */
+static inline struct kgsl_cmdbatch *adreno_dispatcher_get_cmdbatch(
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_cmdbatch *cmdbatch = NULL;
+
+	mutex_lock(&drawctxt->mutex);
+	if (drawctxt->cmdqueue_head != drawctxt->cmdqueue_tail) {
+		cmdbatch = drawctxt->cmdqueue[drawctxt->cmdqueue_head];
+
+		/*
+		 * Don't dequeue a cmdbatch that is still waiting for other
+		 * events
+		 */
+		if (kgsl_cmdbatch_sync_pending(cmdbatch)) {
+			cmdbatch = ERR_PTR(-EAGAIN);
+			goto done;
+		}
+
+		drawctxt->cmdqueue_head =
+			CMDQUEUE_NEXT(drawctxt->cmdqueue_head,
+			ADRENO_CONTEXT_CMDQUEUE_SIZE);
+		drawctxt->queued--;
+	}
+
+done:
+	mutex_unlock(&drawctxt->mutex);
+
+	return cmdbatch;
+}
+
+/**
+ * adreno_dispatcher_requeue_cmdbatch() - Put a command back on the context
+ * queue
+ * @drawctxt: Pointer to the adreno draw context
+ * @cmdbatch: Pointer to the KGSL cmdbatch to requeue
+ *
+ * Failure to submit a command to the ringbuffer isn't the fault of the command
+ * being submitted so if a failure happens, push it back on the head of the the
+ * context queue to be reconsidered again unless the context got detached.
+ */
+static inline int adreno_dispatcher_requeue_cmdbatch(
+		struct adreno_context *drawctxt, struct kgsl_cmdbatch *cmdbatch)
+{
+	unsigned int prev;
+	mutex_lock(&drawctxt->mutex);
+
+	if (kgsl_context_detached(&drawctxt->base) ||
+		drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) {
+		mutex_unlock(&drawctxt->mutex);
+		/* get rid of this cmdbatch since the context is bad */
+		kgsl_cmdbatch_destroy(cmdbatch);
+		return -EINVAL;
+	}
+
+	prev = drawctxt->cmdqueue_head == 0 ?
+		(ADRENO_CONTEXT_CMDQUEUE_SIZE - 1) :
+		(drawctxt->cmdqueue_head - 1);
+
+	/*
+	 * The maximum queue size always needs to be one less then the size of
+	 * the ringbuffer queue so there is "room" to put the cmdbatch back in
+	 */
+
+	BUG_ON(prev == drawctxt->cmdqueue_tail);
+
+	drawctxt->cmdqueue[prev] = cmdbatch;
+	drawctxt->queued++;
+
+	/* Reset the command queue head to reflect the newly requeued change */
+	drawctxt->cmdqueue_head = prev;
+	mutex_unlock(&drawctxt->mutex);
+	return 0;
+}
+
+/**
+ * dispatcher_queue_context() - Queue a context in the dispatcher pending list
+ * @dispatcher: Pointer to the adreno dispatcher struct
+ * @drawctxt: Pointer to the adreno draw context
+ *
+ * Add a context to the dispatcher pending list.
+ */
+static void  dispatcher_queue_context(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	/* Refuse to queue a detached context */
+	if (kgsl_context_detached(&drawctxt->base))
+		return;
+
+	spin_lock(&dispatcher->plist_lock);
+
+	if (plist_node_empty(&drawctxt->pending)) {
+		/* Get a reference to the context while it sits on the list */
+		if (_kgsl_context_get(&drawctxt->base)) {
+			trace_dispatch_queue_context(drawctxt);
+			plist_add(&drawctxt->pending, &dispatcher->pending);
+		}
+	}
+
+	spin_unlock(&dispatcher->plist_lock);
+}
+
+/**
+ * sendcmd() - Send a command batch to the GPU hardware
+ * @dispatcher: Pointer to the adreno dispatcher struct
+ * @cmdbatch: Pointer to the KGSL cmdbatch being sent
+ *
+ * Send a KGSL command batch to the GPU hardware
+ */
+static int sendcmd(struct adreno_device *adreno_dev,
+	struct kgsl_cmdbatch *cmdbatch)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int ret;
+
+	dispatcher->inflight++;
+
+	mutex_lock(&device->mutex);
+
+	if (dispatcher->inflight == 1 &&
+			!test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) {
+		/* Time to make the donuts.  Turn on the GPU */
+		ret = kgsl_active_count_get(device);
+		if (ret) {
+			dispatcher->inflight--;
+			mutex_unlock(&device->mutex);
+			return ret;
+		}
+
+		set_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+	}
+
+	ret = adreno_ringbuffer_submitcmd(adreno_dev, cmdbatch);
+
+	/*
+	 * On the first command, if the submission was successful, then read the
+	 * fault registers.  If it failed then turn off the GPU. Sad face.
+	 */
+
+	if (dispatcher->inflight == 1) {
+		if (ret == 0)
+			fault_detect_read(device);
+		else {
+			kgsl_active_count_put(device);
+			clear_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+		}
+	}
+
+	mutex_unlock(&device->mutex);
+
+	if (ret) {
+		dispatcher->inflight--;
+		KGSL_DRV_ERR(device,
+			"Unable to submit command to the ringbuffer %d\n", ret);
+		return ret;
+	}
+
+	trace_adreno_cmdbatch_submitted(cmdbatch, dispatcher->inflight);
+
+	dispatcher->cmdqueue[dispatcher->tail] = cmdbatch;
+	dispatcher->tail = (dispatcher->tail + 1) %
+		ADRENO_DISPATCH_CMDQUEUE_SIZE;
+
+	/*
+	 * If this is the first command in the pipe then the GPU will
+	 * immediately start executing it so we can start the expiry timeout on
+	 * the command batch here.  Subsequent command batches will have their
+	 * timer started when the previous command batch is retired
+	 */
+	if (dispatcher->inflight == 1) {
+		cmdbatch->expires = jiffies +
+			msecs_to_jiffies(_cmdbatch_timeout);
+		mod_timer(&dispatcher->timer, cmdbatch->expires);
+
+		/* Start the fault detection timer */
+		if (adreno_dev->fast_hang_detect)
+			mod_timer(&dispatcher->fault_timer,
+				jiffies +
+				msecs_to_jiffies(_fault_timer_interval));
+	}
+
+	return 0;
+}
+
+/**
+ * dispatcher_context_sendcmds() - Send commands from a context to the GPU
+ * @adreno_dev: Pointer to the adreno device struct
+ * @drawctxt: Pointer to the adreno context to dispatch commands from
+ *
+ * Dequeue and send a burst of commands from the specified context to the GPU
+ * Returns postive if the context needs to be put back on the pending queue
+ * 0 if the context is empty or detached and negative on error
+ */
+static int dispatcher_context_sendcmds(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int count = 0;
+	int requeued = 0;
+
+	/*
+	 * Each context can send a specific number of command batches per cycle
+	 */
+	while ((count < _context_cmdbatch_burst) &&
+		(dispatcher->inflight < _dispatcher_inflight)) {
+		int ret;
+		struct kgsl_cmdbatch *cmdbatch;
+
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			break;
+
+		cmdbatch = adreno_dispatcher_get_cmdbatch(drawctxt);
+
+		/*
+		 * adreno_context_get_cmdbatch returns -EAGAIN if the current
+		 * cmdbatch has pending sync points so no more to do here.
+		 * When the sync points are satisfied then the context will get
+		 * reqeueued
+		 */
+
+		if (IS_ERR_OR_NULL(cmdbatch)) {
+			if (IS_ERR(cmdbatch) && PTR_ERR(cmdbatch) == -EAGAIN)
+				requeued = 1;
+			break;
+		}
+
+		/*
+		 * If this is a synchronization submission then there are no
+		 * commands to submit.  Discard it and get the next item from
+		 * the queue.  Decrement count so this packet doesn't count
+		 * against the burst for the context
+		 */
+
+		if (cmdbatch->flags & KGSL_CONTEXT_SYNC) {
+			kgsl_cmdbatch_destroy(cmdbatch);
+			continue;
+		}
+
+		ret = sendcmd(adreno_dev, cmdbatch);
+
+		/*
+		 * There are various reasons why we can't submit a command (no
+		 * memory for the commands, full ringbuffer, etc) but none of
+		 * these are actually the current command's fault.  Requeue it
+		 * back on the context and let it come back around again if
+		 * conditions improve
+		 */
+		if (ret) {
+			requeued = adreno_dispatcher_requeue_cmdbatch(drawctxt,
+				cmdbatch) ? 0 : 1;
+			break;
+		}
+
+		count++;
+	}
+
+	/*
+	 * If the context successfully submitted commands there will be room
+	 * in the context queue so wake up any snoozing threads that want to
+	 * submit commands
+	 */
+
+	if (count)
+		wake_up_interruptible_all(&drawctxt->wq);
+
+	/*
+	 * Return positive if the context submitted commands or if we figured
+	 * out that we need to requeue due to a pending sync or error.
+	 */
+
+	return (count || requeued) ? 1 : 0;
+}
+
+/**
+ * _adreno_dispatcher_issuecmds() - Issue commmands from pending contexts
+ * @adreno_dev: Pointer to the adreno device struct
+ *
+ * Issue as many commands as possible (up to inflight) from the pending contexts
+ * This function assumes the dispatcher mutex has been locked.
+ */
+static int _adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct adreno_context *drawctxt, *next;
+	struct plist_head requeue;
+	int ret;
+
+	/* Leave early if the dispatcher isn't in a happy state */
+	if (adreno_gpu_fault(adreno_dev) != 0)
+			return 0;
+
+	plist_head_init(&requeue);
+
+	/* Try to fill the ringbuffer as much as possible */
+	while (dispatcher->inflight < _dispatcher_inflight) {
+
+		/* Stop doing things if the dispatcher is paused or faulted */
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			break;
+
+		spin_lock(&dispatcher->plist_lock);
+
+		if (plist_head_empty(&dispatcher->pending)) {
+			spin_unlock(&dispatcher->plist_lock);
+			break;
+		}
+
+		/* Get the next entry on the list */
+		drawctxt = plist_first_entry(&dispatcher->pending,
+			struct adreno_context, pending);
+
+		plist_del(&drawctxt->pending, &dispatcher->pending);
+
+		spin_unlock(&dispatcher->plist_lock);
+
+		if (kgsl_context_detached(&drawctxt->base) ||
+			drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) {
+			kgsl_context_put(&drawctxt->base);
+			continue;
+		}
+
+		ret = dispatcher_context_sendcmds(adreno_dev, drawctxt);
+
+		if (ret > 0) {
+			spin_lock(&dispatcher->plist_lock);
+
+			/*
+			 * Check to seen if the context had been requeued while
+			 * we were processing it (probably by another thread
+			 * pushing commands). If it has then we don't need to
+			 * bother with it but do a put to make sure the
+			 * reference counting stays accurate.  If the node is
+			 * empty then we will put it on the requeue list and not
+			 * touch the refcount since we already hold it from the
+			 * first time it went on the list.
+			 */
+
+			if (plist_node_empty(&drawctxt->pending))
+				plist_add(&drawctxt->pending, &requeue);
+			else
+				kgsl_context_put(&drawctxt->base);
+
+			spin_unlock(&dispatcher->plist_lock);
+		} else {
+			/*
+			 * If the context doesn't need be requeued put back the
+			 * refcount
+			 */
+
+			kgsl_context_put(&drawctxt->base);
+		}
+	}
+
+	/* Put all the requeued contexts back on the master list */
+
+	spin_lock(&dispatcher->plist_lock);
+
+	plist_for_each_entry_safe(drawctxt, next, &requeue, pending) {
+		plist_del(&drawctxt->pending, &requeue);
+		plist_add(&drawctxt->pending, &dispatcher->pending);
+	}
+
+	spin_unlock(&dispatcher->plist_lock);
+
+	return 0;
+}
+
+/**
+ * adreno_dispatcher_issuecmds() - Issue commmands from pending contexts
+ * @adreno_dev: Pointer to the adreno device struct
+ *
+ * Lock the dispatcher and call _adreno_dispatcher_issueibcmds
+ */
+int adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int ret;
+
+	mutex_lock(&dispatcher->mutex);
+	ret = _adreno_dispatcher_issuecmds(adreno_dev);
+	mutex_unlock(&dispatcher->mutex);
+
+	return ret;
+}
+
+static int _check_context_queue(struct adreno_context *drawctxt)
+{
+	int ret;
+
+	mutex_lock(&drawctxt->mutex);
+
+	/*
+	 * Wake up if there is room in the context or if the whole thing got
+	 * invalidated while we were asleep
+	 */
+
+	if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID)
+		ret = 1;
+	else
+		ret = drawctxt->queued < _context_cmdqueue_size ? 1 : 0;
+
+	mutex_unlock(&drawctxt->mutex);
+
+	return ret;
+}
+
+/**
+ * get_timestamp() - Return the next timestamp for the context
+ * @drawctxt - Pointer to an adreno draw context struct
+ * @cmdbatch - Pointer to a command batch
+ * @timestamp - Pointer to a timestamp value possibly passed from the user
+ *
+ * Assign a timestamp based on the settings of the draw context and the command
+ * batch.
+ */
+static int get_timestamp(struct adreno_context *drawctxt,
+		struct kgsl_cmdbatch *cmdbatch, unsigned int *timestamp)
+{
+	/* Synchronization commands don't get a timestamp */
+	if (cmdbatch->flags & KGSL_CONTEXT_SYNC) {
+		*timestamp = 0;
+		return 0;
+	}
+
+	if (drawctxt->base.flags & KGSL_CONTEXT_USER_GENERATED_TS) {
+		/*
+		 * User specified timestamps need to be greater than the last
+		 * issued timestamp in the context
+		 */
+		if (timestamp_cmp(drawctxt->timestamp, *timestamp) >= 0)
+			return -ERANGE;
+
+		drawctxt->timestamp = *timestamp;
+	} else
+		drawctxt->timestamp++;
+
+	*timestamp = drawctxt->timestamp;
+	return 0;
+}
+
+/**
+ * adreno_dispactcher_queue_cmd() - Queue a new command in the context
+ * @adreno_dev: Pointer to the adreno device struct
+ * @drawctxt: Pointer to the adreno draw context
+ * @cmdbatch: Pointer to the command batch being submitted
+ * @timestamp: Pointer to the requested timestamp
+ *
+ * Queue a command in the context - if there isn't any room in the queue, then
+ * block until there is
+ */
+int adreno_dispatcher_queue_cmd(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt, struct kgsl_cmdbatch *cmdbatch,
+		uint32_t *timestamp)
+{
+	int ret;
+
+	mutex_lock(&drawctxt->mutex);
+
+	if (kgsl_context_detached(&drawctxt->base)) {
+		mutex_unlock(&drawctxt->mutex);
+		return -EINVAL;
+	}
+
+	/*
+	 * After skipping to the end of the frame we need to force the preamble
+	 * to run (if it exists) regardless of the context state.
+	 */
+
+	if (test_and_clear_bit(ADRENO_CONTEXT_FORCE_PREAMBLE, &drawctxt->priv))
+		set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv);
+
+	/*
+	 * If we are waiting for the end of frame and it hasn't appeared yet,
+	 * then mark the command batch as skipped.  It will still progress
+	 * through the pipeline but it won't actually send any commands
+	 */
+
+	if (test_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->priv)) {
+		set_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv);
+
+		/*
+		 * If this command batch represents the EOF then clear the way
+		 * for the dispatcher to continue submitting
+		 */
+
+		if (cmdbatch->flags & KGSL_CONTEXT_END_OF_FRAME) {
+			clear_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->priv);
+
+			/*
+			 * Force the preamble on the next command to ensure that
+			 * the state is correct
+			 */
+			set_bit(ADRENO_CONTEXT_FORCE_PREAMBLE, &drawctxt->priv);
+		}
+	}
+
+	/* Wait for room in the context queue */
+
+	while (drawctxt->queued >= _context_cmdqueue_size) {
+		trace_adreno_drawctxt_sleep(drawctxt);
+		mutex_unlock(&drawctxt->mutex);
+
+		ret = wait_event_interruptible_timeout(drawctxt->wq,
+			_check_context_queue(drawctxt),
+			msecs_to_jiffies(_context_queue_wait));
+
+		mutex_lock(&drawctxt->mutex);
+		trace_adreno_drawctxt_wake(drawctxt);
+
+		if (ret <= 0) {
+			mutex_unlock(&drawctxt->mutex);
+			return (ret == 0) ? -ETIMEDOUT : (int) ret;
+		}
+	}
+	/*
+	 * Account for the possiblity that the context got invalidated
+	 * while we were sleeping
+	 */
+
+	if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) {
+		mutex_unlock(&drawctxt->mutex);
+		return -EDEADLK;
+	}
+	if (kgsl_context_detached(&drawctxt->base)) {
+		mutex_unlock(&drawctxt->mutex);
+		return -EINVAL;
+	}
+
+	ret = get_timestamp(drawctxt, cmdbatch, timestamp);
+	if (ret) {
+		mutex_unlock(&drawctxt->mutex);
+		return ret;
+	}
+
+	cmdbatch->timestamp = *timestamp;
+
+	/*
+	 * Set the fault tolerance policy for the command batch - assuming the
+	 * context hasn't disabled FT use the current device policy
+	 */
+
+	if (drawctxt->base.flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
+		set_bit(KGSL_FT_DISABLE, &cmdbatch->fault_policy);
+	else
+		cmdbatch->fault_policy = adreno_dev->ft_policy;
+
+	/* Put the command into the queue */
+	drawctxt->cmdqueue[drawctxt->cmdqueue_tail] = cmdbatch;
+	drawctxt->cmdqueue_tail = (drawctxt->cmdqueue_tail + 1) %
+		ADRENO_CONTEXT_CMDQUEUE_SIZE;
+
+	drawctxt->queued++;
+	trace_adreno_cmdbatch_queued(cmdbatch, drawctxt->queued);
+
+
+	mutex_unlock(&drawctxt->mutex);
+
+	/* Add the context to the dispatcher pending list */
+	dispatcher_queue_context(adreno_dev, drawctxt);
+
+	/*
+	 * Only issue commands if inflight is less than burst -this prevents us
+	 * from sitting around waiting for the mutex on a busy system - the work
+	 * loop will schedule it for us. Inflight is mutex protected but the
+	 * worse that can happen is that it will go to 0 after we check and if
+	 * it goes to 0 it is because the work loop decremented it and the work
+	 * queue will try to schedule new commands anyway.
+	 */
+
+	if (adreno_dev->dispatcher.inflight < _context_cmdbatch_burst)
+		adreno_dispatcher_issuecmds(adreno_dev);
+
+	return 0;
+}
+
+static int _mark_context(int id, void *ptr, void *data)
+{
+	unsigned int guilty = *((unsigned int *) data);
+	struct kgsl_context *context = ptr;
+
+	/*
+	 * If the context is guilty mark it as such.  Otherwise mark it as
+	 * innocent if it had not already been marked as guilty.  If id is
+	 * passed as 0 then mark EVERYBODY guilty (recovery failed)
+	 */
+
+	if (guilty == 0 || guilty == context->id)
+		context->reset_status =
+			KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
+	else if (context->reset_status !=
+		KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT)
+		context->reset_status =
+			KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT;
+
+	return 0;
+}
+
+/**
+ * mark_guilty_context() - Mark the given context as guilty (failed recovery)
+ * @device: Pointer to a KGSL device structure
+ * @id: Context ID of the guilty context (or 0 to mark all as guilty)
+ *
+ * Mark the given (or all) context(s) as guilty (failed recovery)
+ */
+static void mark_guilty_context(struct kgsl_device *device, unsigned int id)
+{
+	/* Mark the status for all the contexts in the device */
+
+	read_lock(&device->context_lock);
+	idr_for_each(&device->context_idr, _mark_context, &id);
+	read_unlock(&device->context_lock);
+}
+
+/*
+ * If an IB inside of the command batch has a gpuaddr that matches the base
+ * passed in then zero the size which effectively skips it when it is submitted
+ * in the ringbuffer.
+ */
+static void cmdbatch_skip_ib(struct kgsl_cmdbatch *cmdbatch, unsigned int base)
+{
+	int i;
+
+	for (i = 0; i < cmdbatch->ibcount; i++) {
+		if (cmdbatch->ibdesc[i].gpuaddr == base) {
+			cmdbatch->ibdesc[i].sizedwords = 0;
+			if (base)
+				return;
+		}
+	}
+}
+
+static void cmdbatch_skip_frame(struct kgsl_cmdbatch *cmdbatch,
+	struct kgsl_cmdbatch **replay, int count)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(cmdbatch->context);
+	int skip = 1;
+	int i;
+
+	for (i = 0; i < count; i++) {
+
+		/*
+		 * Only operate on command batches that belong to the
+		 * faulting context
+		 */
+
+		if (replay[i]->context->id != cmdbatch->context->id)
+			continue;
+
+		/*
+		 * Skip all the command batches in this context until
+		 * the EOF flag is seen.  If the EOF flag is seen then
+		 * force the preamble for the next command.
+		 */
+
+		if (skip) {
+			set_bit(CMDBATCH_FLAG_SKIP, &replay[i]->priv);
+
+			if (replay[i]->flags & KGSL_CONTEXT_END_OF_FRAME)
+				skip = 0;
+		} else {
+			set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &replay[i]->priv);
+			return;
+		}
+	}
+
+	/*
+	 * If the EOF flag hasn't been seen yet then set the flag in the
+	 * drawctxt to keep looking for it
+	 */
+
+	if (skip && drawctxt)
+		set_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->priv);
+
+	/*
+	 * If we did see the EOF flag then force the preamble on for the
+	 * next command issued on this context
+	 */
+
+	if (!skip && drawctxt)
+		set_bit(ADRENO_CONTEXT_FORCE_PREAMBLE, &drawctxt->priv);
+}
+
+static void remove_invalidated_cmdbatches(struct kgsl_device *device,
+		struct kgsl_cmdbatch **replay, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		struct kgsl_cmdbatch *cmd = replay[i];
+		struct adreno_context *drawctxt;
+
+		if (cmd == NULL)
+			continue;
+
+		drawctxt = ADRENO_CONTEXT(cmd->context);
+
+		if (kgsl_context_detached(cmd->context) ||
+			drawctxt->state == ADRENO_CONTEXT_STATE_INVALID) {
+			replay[i] = NULL;
+
+			mutex_lock(&device->mutex);
+			kgsl_cancel_events_timestamp(device, cmd->context,
+				cmd->timestamp);
+			mutex_unlock(&device->mutex);
+
+			kgsl_cmdbatch_destroy(cmd);
+		}
+	}
+}
+
+static char _pidname[TASK_COMM_LEN];
+
+static inline const char *_kgsl_context_comm(struct kgsl_context *context)
+{
+	struct task_struct *task = NULL;
+
+	if (context)
+		task = find_task_by_vpid(context->pid);
+
+	if (task)
+		get_task_comm(_pidname, task);
+	else
+		snprintf(_pidname, TASK_COMM_LEN, "unknown");
+
+	return _pidname;
+}
+
+#define pr_fault(_d, _c, fmt, args...) \
+		dev_err((_d)->dev, "%s[%d]: " fmt, \
+		_kgsl_context_comm((_c)->context), \
+		(_c)->context->pid, ##args)
+
+
+static void adreno_fault_header(struct kgsl_device *device,
+	struct kgsl_cmdbatch *cmdbatch)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int status, base, rptr, wptr, ib1base, ib2base, ib1sz, ib2sz;
+
+	kgsl_regread(device,
+			adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS),
+			&status);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_RB_BASE),
+		&base);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_RB_RPTR),
+		&rptr);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_RB_WPTR),
+		&wptr);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB1_BASE),
+		&ib1base);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ),
+		&ib1sz);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB2_BASE),
+		&ib2base);
+	kgsl_regread(device,
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ),
+		&ib2sz);
+
+	trace_adreno_gpu_fault(cmdbatch->context->id, cmdbatch->timestamp,
+		status, rptr, wptr, ib1base, ib1sz, ib2base, ib2sz);
+
+	pr_fault(device, cmdbatch,
+		"gpu fault ctx %d ts %d status %8.8X rb %4.4x/%4.4x ib1 %8.8x/%4.4x ib2 %8.8x/%4.4x\n",
+		cmdbatch->context->id, cmdbatch->timestamp, status,
+		rptr, wptr, ib1base, ib1sz, ib2base, ib2sz);
+}
+
+static int dispatcher_do_fault(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	unsigned int ptr;
+	unsigned int reg, base;
+	struct kgsl_cmdbatch **replay = NULL;
+	struct kgsl_cmdbatch *cmdbatch;
+	int ret, i, count = 0;
+	int fault, first = 0;
+	bool pagefault = false;
+
+	fault = atomic_xchg(&dispatcher->fault, 0);
+	if (fault == 0)
+		return 0;
+	/*
+	 * Return early if no command inflight - can happen on
+	 * false hang detects
+	 */
+	if (dispatcher->inflight == 0) {
+		KGSL_DRV_WARN(device,
+		"dispatcher_do_fault with 0 inflight commands\n");
+		return 0;
+	}
+
+	/* Turn off all the timers */
+	del_timer_sync(&dispatcher->timer);
+	del_timer_sync(&dispatcher->fault_timer);
+
+	mutex_lock(&device->mutex);
+
+	cmdbatch = dispatcher->cmdqueue[dispatcher->head];
+
+	trace_adreno_cmdbatch_fault(cmdbatch, fault);
+
+	/*
+	 * If the fault was due to a timeout then stop the CP to ensure we don't
+	 * get activity while we are trying to dump the state of the system
+	 */
+
+	if (fault & ADRENO_TIMEOUT_FAULT) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, &reg);
+		reg |= (1 << 27) | (1 << 28);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg);
+
+		/* Skip the PM dump for a timeout because it confuses people */
+		set_bit(KGSL_FT_SKIP_PMDUMP, &cmdbatch->fault_policy);
+	}
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BASE, &base);
+
+	/*
+	 * Dump the postmortem and snapshot information if this is the first
+	 * detected fault for the oldest active command batch
+	 */
+
+	if (!test_bit(KGSL_FT_SKIP_PMDUMP, &cmdbatch->fault_policy)) {
+		adreno_fault_header(device, cmdbatch);
+
+		if (device->pm_dump_enable)
+			kgsl_postmortem_dump(device, 0);
+
+		kgsl_device_snapshot(device, 1);
+	}
+
+	mutex_unlock(&device->mutex);
+
+	/* Allocate memory to store the inflight commands */
+	replay = kzalloc(sizeof(*replay) * dispatcher->inflight, GFP_KERNEL);
+
+	if (replay == NULL) {
+		unsigned int ptr = dispatcher->head;
+
+		/* Recovery failed - mark everybody guilty */
+		mark_guilty_context(device, 0);
+
+		while (ptr != dispatcher->tail) {
+			struct kgsl_context *context =
+				dispatcher->cmdqueue[ptr]->context;
+
+			adreno_drawctxt_invalidate(device, context);
+			kgsl_cmdbatch_destroy(dispatcher->cmdqueue[ptr]);
+
+			ptr = CMDQUEUE_NEXT(ptr, ADRENO_DISPATCH_CMDQUEUE_SIZE);
+		}
+
+		/*
+		 * Set the replay count to zero - this will ensure that the
+		 * hardware gets reset but nothing else goes played
+		 */
+
+		count = 0;
+		goto replay;
+	}
+
+	/* Copy the inflight command batches into the temporary storage */
+	ptr = dispatcher->head;
+
+	while (ptr != dispatcher->tail) {
+		replay[count++] = dispatcher->cmdqueue[ptr];
+		ptr = CMDQUEUE_NEXT(ptr, ADRENO_DISPATCH_CMDQUEUE_SIZE);
+	}
+
+	/*
+	 * For the purposes of replay, we assume that the oldest command batch
+	 * that hasn't retired a timestamp is "hung".
+	 */
+
+	cmdbatch = replay[0];
+
+	/*
+	 * If FT is disabled for this cmdbatch invalidate immediately
+	 */
+
+	if (test_bit(KGSL_FT_DISABLE, &cmdbatch->fault_policy) ||
+		test_bit(KGSL_FT_TEMP_DISABLE, &cmdbatch->fault_policy)) {
+		pr_fault(device, cmdbatch, "gpu skipped ctx %d ts %d\n",
+			cmdbatch->context->id, cmdbatch->timestamp);
+
+		mark_guilty_context(device, cmdbatch->context->id);
+		adreno_drawctxt_invalidate(device, cmdbatch->context);
+	}
+
+	/*
+	 * Set a flag so we don't print another PM dump if the cmdbatch fails
+	 * again on replay
+	 */
+
+	set_bit(KGSL_FT_SKIP_PMDUMP, &cmdbatch->fault_policy);
+
+	/*
+	 * A hardware fault generally means something was deterministically
+	 * wrong with the command batch - no point in trying to replay it
+	 * Clear the replay bit and move on to the next policy level
+	 */
+
+	if (fault & ADRENO_HARD_FAULT)
+		clear_bit(KGSL_FT_REPLAY, &(cmdbatch->fault_policy));
+
+	/*
+	 * A timeout fault means the IB timed out - clear the policy and
+	 * invalidate - this will clear the FT_SKIP_PMDUMP bit but that is okay
+	 * because we won't see this cmdbatch again
+	 */
+
+	if (fault & ADRENO_TIMEOUT_FAULT)
+		bitmap_zero(&cmdbatch->fault_policy, BITS_PER_LONG);
+
+	/*
+	 * If the context had a GPU page fault then it is likely it would fault
+	 * again if replayed
+	 */
+
+	if (test_bit(KGSL_CONTEXT_PAGEFAULT, &cmdbatch->context->priv)) {
+		/* we'll need to resume the mmu later... */
+		pagefault = true;
+		clear_bit(KGSL_FT_REPLAY, &cmdbatch->fault_policy);
+		clear_bit(KGSL_CONTEXT_PAGEFAULT, &cmdbatch->context->priv);
+	}
+
+	/*
+	 * Execute the fault tolerance policy. Each command batch stores the
+	 * current fault policy that was set when it was queued.
+	 * As the options are tried in descending priority
+	 * (REPLAY -> SKIPIBS -> SKIPFRAME -> NOTHING) the bits are cleared
+	 * from the cmdbatch policy so the next thing can be tried if the
+	 * change comes around again
+	 */
+
+	/* Replay the hanging command batch again */
+	if (test_and_clear_bit(KGSL_FT_REPLAY, &cmdbatch->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdbatch, BIT(KGSL_FT_REPLAY));
+		set_bit(KGSL_FT_REPLAY, &cmdbatch->fault_recovery);
+		goto replay;
+	}
+
+	/*
+	 * Skip the last IB1 that was played but replay everything else.
+	 * Note that the last IB1 might not be in the "hung" command batch
+	 * because the CP may have caused a page-fault while it was prefetching
+	 * the next IB1/IB2. walk all outstanding commands and zap the
+	 * supposedly bad IB1 where ever it lurks.
+	 */
+
+	if (test_and_clear_bit(KGSL_FT_SKIPIB, &cmdbatch->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdbatch, BIT(KGSL_FT_SKIPIB));
+		set_bit(KGSL_FT_SKIPIB, &cmdbatch->fault_recovery);
+
+		for (i = 0; i < count; i++) {
+			if (replay[i] != NULL &&
+				replay[i]->context->id == cmdbatch->context->id)
+				cmdbatch_skip_ib(replay[i], base);
+		}
+
+		goto replay;
+	}
+
+	if (test_and_clear_bit(KGSL_FT_SKIPFRAME, &cmdbatch->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdbatch,
+			BIT(KGSL_FT_SKIPFRAME));
+		set_bit(KGSL_FT_SKIPFRAME, &cmdbatch->fault_recovery);
+
+		/*
+		 * Skip all the pending command batches for this context until
+		 * the EOF frame is seen
+		 */
+		cmdbatch_skip_frame(cmdbatch, replay, count);
+		goto replay;
+	}
+
+	/* If we get here then all the policies failed */
+
+	pr_fault(device, cmdbatch, "gpu failed ctx %d ts %d\n",
+		cmdbatch->context->id, cmdbatch->timestamp);
+
+	/* Mark the context as failed */
+	mark_guilty_context(device, cmdbatch->context->id);
+
+	/* Invalidate the context */
+	adreno_drawctxt_invalidate(device, cmdbatch->context);
+
+
+replay:
+	/* Reset the dispatcher queue */
+	dispatcher->inflight = 0;
+	dispatcher->head = dispatcher->tail = 0;
+
+	/* Reset the GPU */
+	mutex_lock(&device->mutex);
+
+	ret = adreno_reset(device);
+	mutex_unlock(&device->mutex);
+	/* if any other fault got in until reset then ignore */
+	fault = atomic_xchg(&dispatcher->fault, 0);
+
+	/* If adreno_reset() fails then what hope do we have for the future? */
+	BUG_ON(ret);
+
+	/* Remove any pending command batches that have been invalidated */
+	remove_invalidated_cmdbatches(device, replay, count);
+
+	/* Replay the pending command buffers */
+	for (i = 0; i < count; i++) {
+
+		int ret;
+
+		if (replay[i] == NULL)
+			continue;
+
+		/*
+		 * Force the preamble on the first command (if applicable) to
+		 * avoid any strange stage issues
+		 */
+
+		if (first == 0) {
+			set_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &replay[i]->priv);
+			first = 1;
+		}
+
+		/*
+		 * Force each command batch to wait for idle - this avoids weird
+		 * CP parse issues
+		 */
+
+		set_bit(CMDBATCH_FLAG_WFI, &replay[i]->priv);
+
+		ret = sendcmd(adreno_dev, replay[i]);
+
+		/*
+		 * If sending the command fails, then try to recover by
+		 * invalidating the context
+		 */
+
+		if (ret) {
+			pr_fault(device, replay[i],
+				"gpu reset failed ctx %d ts %d\n",
+				replay[i]->context->id, replay[i]->timestamp);
+
+			/* Mark this context as guilty (failed recovery) */
+			mark_guilty_context(device, replay[i]->context->id);
+
+			adreno_drawctxt_invalidate(device, replay[i]->context);
+			remove_invalidated_cmdbatches(device, &replay[i],
+				count - i);
+		}
+	}
+
+	kfree(replay);
+
+	return 1;
+}
+
+static inline int cmdbatch_consumed(struct kgsl_cmdbatch *cmdbatch,
+		unsigned int consumed, unsigned int retired)
+{
+	return ((timestamp_cmp(cmdbatch->timestamp, consumed) >= 0) &&
+		(timestamp_cmp(retired, cmdbatch->timestamp) < 0));
+}
+
+static void _print_recovery(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch)
+{
+	static struct {
+		unsigned int mask;
+		const char *str;
+	} flags[] = { ADRENO_FT_TYPES };
+
+	int i, nr = find_first_bit(&cmdbatch->fault_recovery, BITS_PER_LONG);
+	char *result = "unknown";
+
+	for (i = 0; i < ARRAY_SIZE(flags); i++) {
+		if (flags[i].mask == BIT(nr)) {
+			result = (char *) flags[i].str;
+			break;
+		}
+	}
+
+	pr_fault(device, cmdbatch,
+		"gpu %s ctx %d ts %d policy %lX\n",
+		result, cmdbatch->context->id, cmdbatch->timestamp,
+		cmdbatch->fault_recovery);
+}
+
+/**
+ * adreno_dispatcher_work() - Master work handler for the dispatcher
+ * @work: Pointer to the work struct for the current work queue
+ *
+ * Process expired commands and send new ones.
+ */
+static void adreno_dispatcher_work(struct work_struct *work)
+{
+	struct adreno_dispatcher *dispatcher =
+		container_of(work, struct adreno_dispatcher, work);
+	struct adreno_device *adreno_dev =
+		container_of(dispatcher, struct adreno_device, dispatcher);
+	struct kgsl_device *device = &adreno_dev->dev;
+	int count = 0;
+	int fault_handled = 0;
+
+	mutex_lock(&dispatcher->mutex);
+
+	while (dispatcher->head != dispatcher->tail) {
+		uint32_t consumed, retired = 0;
+		struct kgsl_cmdbatch *cmdbatch =
+			dispatcher->cmdqueue[dispatcher->head];
+		struct adreno_context *drawctxt;
+		BUG_ON(cmdbatch == NULL);
+
+		drawctxt = ADRENO_CONTEXT(cmdbatch->context);
+
+		/*
+		 * First try to expire the timestamp. This happens if the
+		 * context is valid and the timestamp expired normally or if the
+		 * context was destroyed before the command batch was finished
+		 * in the GPU.  Either way retire the command batch advance the
+		 * pointers and continue processing the queue
+		 */
+
+		retired = kgsl_readtimestamp(device, cmdbatch->context,
+				KGSL_TIMESTAMP_RETIRED);
+
+		if ((timestamp_cmp(cmdbatch->timestamp, retired) <= 0)) {
+
+			/*
+			 * If the cmdbatch in question had faulted announce its
+			 * successful completion to the world
+			 */
+
+			if (cmdbatch->fault_recovery != 0) {
+				struct adreno_context *drawctxt =
+					ADRENO_CONTEXT(cmdbatch->context);
+
+				/* Mark the context as faulted and recovered */
+				set_bit(ADRENO_CONTEXT_FAULT, &drawctxt->priv);
+
+				_print_recovery(device, cmdbatch);
+			}
+
+			trace_adreno_cmdbatch_retired(cmdbatch,
+				dispatcher->inflight - 1);
+
+			/* Reduce the number of inflight command batches */
+			dispatcher->inflight--;
+
+			/* Zero the old entry*/
+			dispatcher->cmdqueue[dispatcher->head] = NULL;
+
+			/* Advance the buffer head */
+			dispatcher->head = CMDQUEUE_NEXT(dispatcher->head,
+				ADRENO_DISPATCH_CMDQUEUE_SIZE);
+
+			/* Destroy the retired command batch */
+			kgsl_cmdbatch_destroy(cmdbatch);
+
+			/* Update the expire time for the next command batch */
+
+			if (dispatcher->inflight > 0) {
+				cmdbatch =
+					dispatcher->cmdqueue[dispatcher->head];
+				cmdbatch->expires = jiffies +
+					msecs_to_jiffies(_cmdbatch_timeout);
+			}
+
+			count++;
+			continue;
+		}
+
+		/*
+		 * If we got a fault from the interrupt handler, this command
+		 * is to blame.  Invalidate it, reset and replay
+		 */
+
+		if (dispatcher_do_fault(device))
+			goto done;
+		fault_handled = 1;
+
+		/* Get the last consumed timestamp */
+		consumed = kgsl_readtimestamp(device, cmdbatch->context,
+			KGSL_TIMESTAMP_CONSUMED);
+
+		/*
+		 * Break here if fault detection is disabled for the context or
+		 * if the long running IB detection is disaled device wide
+		 * Long running command buffers will be allowed to run to
+		 * completion - but badly behaving command buffers (infinite
+		 * shaders etc) can end up running forever.
+		 */
+
+		if (!adreno_dev->long_ib_detect ||
+			drawctxt->base.flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
+			break;
+
+		/*
+		 * The last line of defense is to check if the command batch has
+		 * timed out. If we get this far but the timeout hasn't expired
+		 * yet then the GPU is still ticking away
+		 */
+
+		if (time_is_after_jiffies(cmdbatch->expires))
+			break;
+
+		/* Boom goes the dynamite */
+
+		pr_fault(device, cmdbatch,
+			"gpu timeout ctx %d ts %d\n",
+			cmdbatch->context->id, cmdbatch->timestamp);
+
+		adreno_set_gpu_fault(adreno_dev, ADRENO_TIMEOUT_FAULT);
+
+		dispatcher_do_fault(device);
+		fault_handled = 1;
+		break;
+	}
+
+	/*
+	 * Call the dispatcher fault routine here so the fault bit gets cleared
+	 * when no commands are in dispatcher but fault bit is set. This can
+	 * happen on false hang detects
+	 */
+	if (!fault_handled && dispatcher_do_fault(device))
+		goto done;
+
+	/*
+	 * If inflight went to 0, queue back up the event processor to catch
+	 * stragglers
+	 */
+	if (dispatcher->inflight == 0 && count) {
+		mutex_lock(&device->mutex);
+		queue_work(device->work_queue, &device->ts_expired_ws);
+		mutex_unlock(&device->mutex);
+	}
+
+	/* Dispatch new commands if we have the room */
+	if (dispatcher->inflight < _dispatcher_inflight)
+		_adreno_dispatcher_issuecmds(adreno_dev);
+
+done:
+	/* Either update the timer for the next command batch or disable it */
+	if (dispatcher->inflight) {
+		struct kgsl_cmdbatch *cmdbatch
+			= dispatcher->cmdqueue[dispatcher->head];
+
+		/* Update the timeout timer for the next command batch */
+		mod_timer(&dispatcher->timer, cmdbatch->expires);
+
+		/* There are still things in flight - update the idle counts */
+		mutex_lock(&device->mutex);
+		kgsl_pwrscale_idle(device);
+		mutex_unlock(&device->mutex);
+	} else {
+		/* There is nothing left in the pipeline.  Shut 'er down boys */
+		mutex_lock(&device->mutex);
+		/*
+		 * Stop the fault timer before decrementing the active count to
+		 * avoid reading the hardware registers while we are trying to
+		 * turn clocks off
+		 */
+		del_timer_sync(&dispatcher->fault_timer);
+
+		if (test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) {
+			kgsl_active_count_put(device);
+			clear_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+		}
+
+		mutex_unlock(&device->mutex);
+	}
+
+	/* Before leaving update the pwrscale information */
+	mutex_lock(&device->mutex);
+	kgsl_pwrscale_idle(device);
+	mutex_unlock(&device->mutex);
+
+	mutex_unlock(&dispatcher->mutex);
+}
+
+void adreno_dispatcher_schedule(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	queue_work(device->work_queue, &dispatcher->work);
+}
+
+/**
+ * adreno_dispatcher_queue_context() - schedule a drawctxt in the dispatcher
+ * device: pointer to the KGSL device
+ * drawctxt: pointer to the drawctxt to schedule
+ *
+ * Put a draw context on the dispatcher pending queue and schedule the
+ * dispatcher. This is used to reschedule changes that might have been blocked
+ * for sync points or other concerns
+ */
+void adreno_dispatcher_queue_context(struct kgsl_device *device,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	dispatcher_queue_context(adreno_dev, drawctxt);
+	adreno_dispatcher_schedule(device);
+}
+
+/*
+ * This is called on a regular basis while command batches are inflight.  Fault
+ * detection registers are read and compared to the existing values - if they
+ * changed then the GPU is still running.  If they are the same between
+ * subsequent calls then the GPU may have faulted
+ */
+
+void adreno_dispatcher_fault_timer(unsigned long data)
+{
+	struct adreno_device *adreno_dev = (struct adreno_device *) data;
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	/* Leave if the user decided to turn off fast hang detection */
+	if (adreno_dev->fast_hang_detect == 0)
+		return;
+
+	if (adreno_gpu_fault(adreno_dev)) {
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+	/*
+	 * Read the fault registers - if it returns 0 then they haven't changed
+	 * so mark the dispatcher as faulted and schedule the work loop.
+	 */
+
+	if (!fault_detect_read_compare(device)) {
+		adreno_set_gpu_fault(adreno_dev, ADRENO_SOFT_FAULT);
+		adreno_dispatcher_schedule(device);
+	} else {
+		mod_timer(&dispatcher->fault_timer,
+			jiffies + msecs_to_jiffies(_fault_timer_interval));
+	}
+}
+
+/*
+ * This is called when the timer expires - it either means the GPU is hung or
+ * the IB is taking too long to execute
+ */
+void adreno_dispatcher_timer(unsigned long data)
+{
+	struct adreno_device *adreno_dev = (struct adreno_device *) data;
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	adreno_dispatcher_schedule(device);
+}
+/**
+ * adreno_dispatcher_irq_fault() - Trigger a fault in the dispatcher
+ * @device: Pointer to the KGSL device
+ *
+ * Called from an interrupt context this will trigger a fault in the
+ * dispatcher for the oldest pending command batch
+ */
+void adreno_dispatcher_irq_fault(struct kgsl_device *device)
+{
+	adreno_set_gpu_fault(ADRENO_DEVICE(device), ADRENO_HARD_FAULT);
+	adreno_dispatcher_schedule(device);
+}
+
+/**
+ * adreno_dispatcher_start() - activate the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ */
+void adreno_dispatcher_start(struct kgsl_device *device)
+{
+	complete_all(&device->cmdbatch_gate);
+
+	/* Schedule the work loop to get things going */
+	adreno_dispatcher_schedule(device);
+}
+
+/**
+ * adreno_dispatcher_stop() - stop the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ * Stop the dispatcher and close all the timers
+ */
+void adreno_dispatcher_stop(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	del_timer_sync(&dispatcher->timer);
+	del_timer_sync(&dispatcher->fault_timer);
+}
+
+/**
+ * adreno_dispatcher_close() - close the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ * Close the dispatcher and free all the oustanding commands and memory
+ */
+void adreno_dispatcher_close(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	mutex_lock(&dispatcher->mutex);
+	del_timer_sync(&dispatcher->timer);
+	del_timer_sync(&dispatcher->fault_timer);
+
+	while (dispatcher->head != dispatcher->tail) {
+		kgsl_cmdbatch_destroy(dispatcher->cmdqueue[dispatcher->head]);
+		dispatcher->head = (dispatcher->head + 1)
+			% ADRENO_DISPATCH_CMDQUEUE_SIZE;
+	}
+
+	mutex_unlock(&dispatcher->mutex);
+
+	kobject_put(&dispatcher->kobj);
+}
+
+struct dispatcher_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct adreno_dispatcher *,
+			struct dispatcher_attribute *, char *);
+	ssize_t (*store)(struct adreno_dispatcher *,
+			struct dispatcher_attribute *, const char *buf,
+			size_t count);
+	unsigned int max;
+	unsigned int *value;
+};
+
+#define DISPATCHER_UINT_ATTR(_name, _mode, _max, _value) \
+	struct dispatcher_attribute dispatcher_attr_##_name =  { \
+		.attr = { .name = __stringify(_name), .mode = _mode }, \
+		.show = _show_uint, \
+		.store = _store_uint, \
+		.max = _max, \
+		.value = &(_value), \
+	}
+
+#define to_dispatcher_attr(_a) \
+	container_of((_a), struct dispatcher_attribute, attr)
+#define to_dispatcher(k) container_of(k, struct adreno_dispatcher, kobj)
+
+static ssize_t _store_uint(struct adreno_dispatcher *dispatcher,
+		struct dispatcher_attribute *attr,
+		const char *buf, size_t size)
+{
+	unsigned long val;
+	int ret = kstrtoul(buf, 0, &val);
+
+	if (ret)
+		return ret;
+
+	if (!val || (attr->max && (val > attr->max)))
+		return -EINVAL;
+
+	*((unsigned int *) attr->value) = val;
+	return size;
+}
+
+static ssize_t _show_uint(struct adreno_dispatcher *dispatcher,
+		struct dispatcher_attribute *attr,
+		char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		*((unsigned int *) attr->value));
+}
+
+static DISPATCHER_UINT_ATTR(inflight, 0644, ADRENO_DISPATCH_CMDQUEUE_SIZE,
+	_dispatcher_inflight);
+/*
+ * Our code that "puts back" a command from the context is much cleaner
+ * if we are sure that there will always be enough room in the
+ * ringbuffer so restrict the maximum size of the context queue to
+ * ADRENO_CONTEXT_CMDQUEUE_SIZE - 1
+ */
+static DISPATCHER_UINT_ATTR(context_cmdqueue_size, 0644,
+	ADRENO_CONTEXT_CMDQUEUE_SIZE - 1, _context_cmdqueue_size);
+static DISPATCHER_UINT_ATTR(context_burst_count, 0644, 0,
+	_context_cmdbatch_burst);
+static DISPATCHER_UINT_ATTR(cmdbatch_timeout, 0644, 0, _cmdbatch_timeout);
+static DISPATCHER_UINT_ATTR(context_queue_wait, 0644, 0, _context_queue_wait);
+static DISPATCHER_UINT_ATTR(fault_detect_interval, 0644, 0,
+	_fault_timer_interval);
+
+static struct attribute *dispatcher_attrs[] = {
+	&dispatcher_attr_inflight.attr,
+	&dispatcher_attr_context_cmdqueue_size.attr,
+	&dispatcher_attr_context_burst_count.attr,
+	&dispatcher_attr_cmdbatch_timeout.attr,
+	&dispatcher_attr_context_queue_wait.attr,
+	&dispatcher_attr_fault_detect_interval.attr,
+	NULL,
+};
+
+static ssize_t dispatcher_sysfs_show(struct kobject *kobj,
+				   struct attribute *attr, char *buf)
+{
+	struct adreno_dispatcher *dispatcher = to_dispatcher(kobj);
+	struct dispatcher_attribute *pattr = to_dispatcher_attr(attr);
+	ssize_t ret = -EIO;
+
+	if (pattr->show)
+		ret = pattr->show(dispatcher, pattr, buf);
+
+	return ret;
+}
+
+static ssize_t dispatcher_sysfs_store(struct kobject *kobj,
+				    struct attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct adreno_dispatcher *dispatcher = to_dispatcher(kobj);
+	struct dispatcher_attribute *pattr = to_dispatcher_attr(attr);
+	ssize_t ret = -EIO;
+
+	if (pattr->store)
+		ret = pattr->store(dispatcher, pattr, buf, count);
+
+	return ret;
+}
+
+static void dispatcher_sysfs_release(struct kobject *kobj)
+{
+}
+
+static const struct sysfs_ops dispatcher_sysfs_ops = {
+	.show = dispatcher_sysfs_show,
+	.store = dispatcher_sysfs_store
+};
+
+static struct kobj_type ktype_dispatcher = {
+	.sysfs_ops = &dispatcher_sysfs_ops,
+	.default_attrs = dispatcher_attrs,
+	.release = dispatcher_sysfs_release
+};
+
+/**
+ * adreno_dispatcher_init() - Initialize the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ * Initialize the dispatcher
+ */
+int adreno_dispatcher_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int ret;
+
+	memset(dispatcher, 0, sizeof(*dispatcher));
+
+	mutex_init(&dispatcher->mutex);
+
+	setup_timer(&dispatcher->timer, adreno_dispatcher_timer,
+		(unsigned long) adreno_dev);
+
+	setup_timer(&dispatcher->fault_timer, adreno_dispatcher_fault_timer,
+		(unsigned long) adreno_dev);
+
+	INIT_WORK(&dispatcher->work, adreno_dispatcher_work);
+
+	plist_head_init(&dispatcher->pending);
+	spin_lock_init(&dispatcher->plist_lock);
+
+	ret = kobject_init_and_add(&dispatcher->kobj, &ktype_dispatcher,
+		&device->dev->kobj, "dispatch");
+
+	return ret;
+}
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
index 0af4c12e..8ff07ac 100644
--- a/drivers/gpu/msm/adreno_drawctxt.c
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -13,10 +13,12 @@
 
 #include <linux/slab.h>
 #include <linux/msm_kgsl.h>
+#include <linux/sched.h>
 
 #include "kgsl.h"
 #include "kgsl_sharedmem.h"
 #include "adreno.h"
+#include "adreno_trace.h"
 
 #define KGSL_INIT_REFTIMESTAMP		0x7FFFFFFF
 
@@ -132,6 +134,262 @@
 	*incmd = cmd;
 }
 
+static void wait_callback(struct kgsl_device *device, void *priv, u32 id,
+		u32 timestamp, u32 type)
+{
+	struct adreno_context *drawctxt = priv;
+	wake_up_interruptible_all(&drawctxt->waiting);
+}
+
+#define adreno_wait_event_interruptible_timeout(wq, condition, timeout, io)   \
+({                                                                            \
+	long __ret = timeout;                                                 \
+	if (io)                                                               \
+		__wait_io_event_interruptible_timeout(wq, condition, __ret);  \
+	else                                                                  \
+		__wait_event_interruptible_timeout(wq, condition, __ret);     \
+	__ret;                                                                \
+})
+
+#define adreno_wait_event_interruptible(wq, condition, io)                    \
+({                                                                            \
+	long __ret;                                                           \
+	if (io)                                                               \
+		__wait_io_event_interruptible(wq, condition, __ret);          \
+	else                                                                  \
+		__wait_event_interruptible(wq, condition, __ret);             \
+	__ret;                                                                \
+})
+
+static int _check_context_timestamp(struct kgsl_device *device,
+		struct adreno_context *drawctxt, unsigned int timestamp)
+{
+	int ret = 0;
+
+	/* Bail if the drawctxt has been invalidated or destroyed */
+	if (kgsl_context_detached(&drawctxt->base) ||
+		drawctxt->state != ADRENO_CONTEXT_STATE_ACTIVE)
+		return 1;
+
+	mutex_lock(&device->mutex);
+	ret = kgsl_check_timestamp(device, &drawctxt->base, timestamp);
+	mutex_unlock(&device->mutex);
+
+	return ret;
+}
+
+/**
+ * adreno_drawctxt_wait() - sleep until a timestamp expires
+ * @adreno_dev: pointer to the adreno_device struct
+ * @drawctxt: Pointer to the draw context to sleep for
+ * @timetamp: Timestamp to wait on
+ * @timeout: Number of jiffies to wait (0 for infinite)
+ *
+ * Register an event to wait for a timestamp on a context and sleep until it
+ * has past.  Returns < 0 on error, -ETIMEDOUT if the timeout expires or 0
+ * on success
+ */
+int adreno_drawctxt_wait(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout)
+{
+	static unsigned int io_cnt;
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int ret, io;
+
+	if (kgsl_context_detached(context))
+		return -EINVAL;
+
+	if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID)
+		return -EDEADLK;
+
+	/* Needs to hold the device mutex */
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	trace_adreno_drawctxt_wait_start(context->id, timestamp);
+
+	ret = kgsl_add_event(device, context->id, timestamp,
+		wait_callback, drawctxt, NULL);
+	if (ret)
+		goto done;
+
+	/*
+	 * For proper power accounting sometimes we need to call
+	 * io_wait_interruptible_timeout and sometimes we need to call
+	 * plain old wait_interruptible_timeout. We call the regular
+	 * timeout N times out of 100, where N is a number specified by
+	 * the current power level
+	 */
+
+	io_cnt = (io_cnt + 1) % 100;
+	io = (io_cnt < pwr->pwrlevels[pwr->active_pwrlevel].io_fraction)
+		? 0 : 1;
+
+	mutex_unlock(&device->mutex);
+
+	if (timeout) {
+		ret = (int) adreno_wait_event_interruptible_timeout(
+			drawctxt->waiting,
+			_check_context_timestamp(device, drawctxt, timestamp),
+			msecs_to_jiffies(timeout), io);
+
+		if (ret == 0)
+			ret = -ETIMEDOUT;
+		else if (ret > 0)
+			ret = 0;
+	} else {
+		ret = (int) adreno_wait_event_interruptible(drawctxt->waiting,
+			_check_context_timestamp(device, drawctxt, timestamp),
+				io);
+	}
+
+	mutex_lock(&device->mutex);
+
+	/* -EDEADLK if the context was invalidated while we were waiting */
+	if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID)
+		ret = -EDEADLK;
+
+
+	/* Return -EINVAL if the context was detached while we were waiting */
+	if (kgsl_context_detached(context))
+		ret = -EINVAL;
+
+done:
+	trace_adreno_drawctxt_wait_done(context->id, timestamp, ret);
+	return ret;
+}
+
+static void global_wait_callback(struct kgsl_device *device, void *priv, u32 id,
+		u32 timestamp, u32 type)
+{
+	struct adreno_context *drawctxt = priv;
+
+	wake_up_interruptible_all(&drawctxt->waiting);
+	kgsl_context_put(&drawctxt->base);
+}
+
+static int _check_global_timestamp(struct kgsl_device *device,
+		unsigned int timestamp)
+{
+	int ret;
+
+	mutex_lock(&device->mutex);
+	ret = kgsl_check_timestamp(device, NULL, timestamp);
+	mutex_unlock(&device->mutex);
+
+	return ret;
+}
+
+int adreno_drawctxt_wait_global(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int ret;
+
+	/* Needs to hold the device mutex */
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	if (!_kgsl_context_get(context)) {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	trace_adreno_drawctxt_wait_start(KGSL_MEMSTORE_GLOBAL, timestamp);
+
+	ret = kgsl_add_event(device, KGSL_MEMSTORE_GLOBAL, timestamp,
+		global_wait_callback, drawctxt, NULL);
+	if (ret) {
+		kgsl_context_put(context);
+		goto done;
+	}
+
+	mutex_unlock(&device->mutex);
+
+	if (timeout) {
+		ret = (int) wait_event_interruptible_timeout(drawctxt->waiting,
+			_check_global_timestamp(device, timestamp),
+			msecs_to_jiffies(timeout));
+
+		if (ret == 0)
+			ret = -ETIMEDOUT;
+		else if (ret > 0)
+			ret = 0;
+	} else {
+		ret = (int) wait_event_interruptible(drawctxt->waiting,
+			_check_global_timestamp(device, timestamp));
+	}
+
+	mutex_lock(&device->mutex);
+
+	if (ret)
+		kgsl_cancel_events_timestamp(device, NULL, timestamp);
+
+done:
+	trace_adreno_drawctxt_wait_done(KGSL_MEMSTORE_GLOBAL, timestamp, ret);
+	return ret;
+}
+
+/**
+ * adreno_drawctxt_invalidate() - Invalidate an adreno draw context
+ * @device: Pointer to the KGSL device structure for the GPU
+ * @context: Pointer to the KGSL context structure
+ *
+ * Invalidate the context and remove all queued commands and cancel any pending
+ * waiters
+ */
+void adreno_drawctxt_invalidate(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+
+	trace_adreno_drawctxt_invalidate(drawctxt);
+
+	drawctxt->state = ADRENO_CONTEXT_STATE_INVALID;
+
+	/* Clear the pending queue */
+	mutex_lock(&drawctxt->mutex);
+
+	/*
+	 * set the timestamp to the last value since the context is invalidated
+	 * and we want the pending events for this context to go away
+	 */
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
+
+	while (drawctxt->cmdqueue_head != drawctxt->cmdqueue_tail) {
+		struct kgsl_cmdbatch *cmdbatch =
+			drawctxt->cmdqueue[drawctxt->cmdqueue_head];
+
+		drawctxt->cmdqueue_head = (drawctxt->cmdqueue_head + 1) %
+			ADRENO_CONTEXT_CMDQUEUE_SIZE;
+
+		mutex_unlock(&drawctxt->mutex);
+
+		mutex_lock(&device->mutex);
+		kgsl_cancel_events_timestamp(device, context,
+			cmdbatch->timestamp);
+		mutex_unlock(&device->mutex);
+
+		kgsl_cmdbatch_destroy(cmdbatch);
+		mutex_lock(&drawctxt->mutex);
+	}
+
+	mutex_unlock(&drawctxt->mutex);
+
+	/* Give the bad news to everybody waiting around */
+	wake_up_interruptible_all(&drawctxt->waiting);
+	wake_up_interruptible_all(&drawctxt->wq);
+}
+
 /**
  * adreno_drawctxt_create - create a new adreno draw context
  * @dev_priv: the owner of the context
@@ -149,6 +407,7 @@
 	int ret;
 
 	drawctxt = kzalloc(sizeof(struct adreno_context), GFP_KERNEL);
+
 	if (drawctxt == NULL)
 		return ERR_PTR(-ENOMEM);
 
@@ -161,98 +420,147 @@
 	drawctxt->bin_base_offset = 0;
 	drawctxt->timestamp = 0;
 
-	*flags &= (KGSL_CONTEXT_PREAMBLE |
+	drawctxt->base.flags = *flags & (KGSL_CONTEXT_PREAMBLE |
 		KGSL_CONTEXT_NO_GMEM_ALLOC |
 		KGSL_CONTEXT_PER_CONTEXT_TS |
 		KGSL_CONTEXT_USER_GENERATED_TS |
 		KGSL_CONTEXT_NO_FAULT_TOLERANCE |
 		KGSL_CONTEXT_TYPE_MASK);
 
-	if (*flags & KGSL_CONTEXT_PREAMBLE)
-		drawctxt->flags |= CTXT_FLAGS_PREAMBLE;
+	/* Always enable per-context timestamps */
+	drawctxt->base.flags |= KGSL_CONTEXT_PER_CONTEXT_TS;
 
-	if (*flags & KGSL_CONTEXT_NO_GMEM_ALLOC)
-		drawctxt->flags |= CTXT_FLAGS_NOGMEMALLOC;
+	mutex_init(&drawctxt->mutex);
+	init_waitqueue_head(&drawctxt->wq);
+	init_waitqueue_head(&drawctxt->waiting);
 
-	if (*flags & KGSL_CONTEXT_PER_CONTEXT_TS)
-		drawctxt->flags |= CTXT_FLAGS_PER_CONTEXT_TS;
+	/*
+	 * Set up the plist node for the dispatcher.  For now all contexts have
+	 * the same priority, but later the priority will be set at create time
+	 * by the user
+	 */
 
-	if (*flags & KGSL_CONTEXT_USER_GENERATED_TS) {
-		if (!(*flags & KGSL_CONTEXT_PER_CONTEXT_TS)) {
-			ret = -EINVAL;
+	plist_node_init(&drawctxt->pending, ADRENO_CONTEXT_DEFAULT_PRIORITY);
+
+	if (adreno_dev->gpudev->ctxt_create) {
+		ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt);
+		if (ret)
 			goto err;
-		}
-		drawctxt->flags |= CTXT_FLAGS_USER_GENERATED_TS;
-	}
-
-	if (*flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
-		drawctxt->flags |= CTXT_FLAGS_NO_FAULT_TOLERANCE;
-
-	drawctxt->type =
-		(*flags & KGSL_CONTEXT_TYPE_MASK) >> KGSL_CONTEXT_TYPE_SHIFT;
-
-	ret = adreno_dev->gpudev->ctxt_create(adreno_dev, drawctxt);
-	if (ret)
+	} else if ((drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE) == 0 ||
+		  (drawctxt->base.flags & KGSL_CONTEXT_NO_GMEM_ALLOC) == 0) {
+		KGSL_DEV_ERR_ONCE(device,
+				"legacy context switch not supported\n");
+		ret = -EINVAL;
 		goto err;
 
-	kgsl_sharedmem_writel(device, &device->memstore,
-			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, ref_wait_ts),
-			KGSL_INIT_REFTIMESTAMP);
-	kgsl_sharedmem_writel(device, &device->memstore,
-			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, ts_cmp_enable),
-			0);
+	} else {
+		drawctxt->ops = &adreno_preamble_ctx_ops;
+	}
+
 	kgsl_sharedmem_writel(device, &device->memstore,
 			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, soptimestamp),
 			0);
 	kgsl_sharedmem_writel(device, &device->memstore,
 			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, eoptimestamp),
 			0);
-
+	/* copy back whatever flags we dediced were valid */
+	*flags = drawctxt->base.flags;
 	return &drawctxt->base;
 err:
-	kgsl_context_put(&drawctxt->base);
+	kgsl_context_detach(&drawctxt->base);
 	return ERR_PTR(ret);
 }
 
 /**
+ * adreno_drawctxt_sched() - Schedule a previously blocked context
+ * @device: pointer to a KGSL device
+ * @drawctxt: drawctxt to rechedule
+ *
+ * This function is called by the core when it knows that a previously blocked
+ * context has been unblocked.  The default adreno response is to reschedule the
+ * context on the dispatcher
+ */
+void adreno_drawctxt_sched(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	adreno_dispatcher_queue_context(device, ADRENO_CONTEXT(context));
+}
+
+/**
  * adreno_drawctxt_detach(): detach a context from the GPU
  * @context: Generic KGSL context container for the context
  *
  */
-void adreno_drawctxt_detach(struct kgsl_context *context)
+int adreno_drawctxt_detach(struct kgsl_context *context)
 {
 	struct kgsl_device *device;
 	struct adreno_device *adreno_dev;
 	struct adreno_context *drawctxt;
+	int ret;
 
 	if (context == NULL)
-		return;
+		return 0;
 
 	device = context->device;
 	adreno_dev = ADRENO_DEVICE(device);
 	drawctxt = ADRENO_CONTEXT(context);
+
 	/* deactivate context */
-	if (adreno_dev->drawctxt_active == drawctxt) {
-		/* no need to save GMEM or shader, the context is
-		 * being destroyed.
-		 */
-		drawctxt->flags &= ~(CTXT_FLAGS_GMEM_SAVE |
-				     CTXT_FLAGS_SHADER_SAVE |
-				     CTXT_FLAGS_GMEM_SHADOW |
-				     CTXT_FLAGS_STATE_SHADOW);
-
-		drawctxt->flags |= CTXT_FLAGS_BEING_DESTROYED;
-
+	if (adreno_dev->drawctxt_active == drawctxt)
 		adreno_drawctxt_switch(adreno_dev, NULL, 0);
+
+	mutex_lock(&drawctxt->mutex);
+
+	while (drawctxt->cmdqueue_head != drawctxt->cmdqueue_tail) {
+		struct kgsl_cmdbatch *cmdbatch =
+			drawctxt->cmdqueue[drawctxt->cmdqueue_head];
+
+		drawctxt->cmdqueue_head = (drawctxt->cmdqueue_head + 1) %
+			ADRENO_CONTEXT_CMDQUEUE_SIZE;
+
+		mutex_unlock(&drawctxt->mutex);
+
+		/*
+		 * Don't hold the drawctxt mutex while the cmdbatch is being
+		 * destroyed because the cmdbatch destroy takes the device
+		 * mutex and the world falls in on itself
+		 */
+
+		kgsl_cmdbatch_destroy(cmdbatch);
+		mutex_lock(&drawctxt->mutex);
 	}
 
-	if (device->state != KGSL_STATE_HUNG)
-		adreno_idle(device);
+	mutex_unlock(&drawctxt->mutex);
+	/*
+	 * internal_timestamp is set in adreno_ringbuffer_addcmds,
+	 * which holds the device mutex. The entire context destroy
+	 * process requires the device mutex as well. But lets
+	 * make sure we notice if the locking changes.
+	 */
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	/* Wait for the last global timestamp to pass before continuing */
+	ret = adreno_drawctxt_wait_global(adreno_dev, context,
+		drawctxt->internal_timestamp, 10 * 1000);
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
 
 	adreno_profile_process_results(device);
 
-	kgsl_sharedmem_free(&drawctxt->gpustate);
-	kgsl_sharedmem_free(&drawctxt->context_gmem_shadow.gmemshadow);
+	if (drawctxt->ops && drawctxt->ops->detach)
+		drawctxt->ops->detach(drawctxt);
+
+	/* wake threads waiting to submit commands from this context */
+	wake_up_interruptible_all(&drawctxt->waiting);
+	wake_up_interruptible_all(&drawctxt->wq);
+
+	return ret;
 }
 
 
@@ -266,6 +574,69 @@
 	kfree(drawctxt);
 }
 
+
+/**
+ * adreno_context_restore() - generic context restore handler
+ * @adreno_dev: the device
+ * @context: the context
+ *
+ * Basic context restore handler that writes the context identifier
+ * to the ringbuffer and issues pagetable switch commands if necessary.
+ * May be called directly from the adreno_context_ops.restore function
+ * pointer or as the first action in a hardware specific restore
+ * function.
+ */
+int adreno_context_restore(struct adreno_device *adreno_dev,
+				  struct adreno_context *context)
+{
+	int ret;
+	struct kgsl_device *device;
+	unsigned int cmds[5];
+
+	if (adreno_dev == NULL || context == NULL)
+		return -EINVAL;
+
+	device = &adreno_dev->dev;
+	/* write the context identifier to the ringbuffer */
+	cmds[0] = cp_nop_packet(1);
+	cmds[1] = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
+	cmds[2] = cp_type3_packet(CP_MEM_WRITE, 2);
+	cmds[3] = device->memstore.gpuaddr +
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context);
+	cmds[4] = context->base.id;
+	ret = adreno_ringbuffer_issuecmds(device, context, KGSL_CMD_FLAGS_NONE,
+					cmds, 5);
+	if (ret)
+		return ret;
+
+	return kgsl_mmu_setstate(&device->mmu,
+			context->base.proc_priv->pagetable,
+			context->base.id);
+}
+
+
+const struct adreno_context_ops adreno_preamble_ctx_ops = {
+	.restore = adreno_context_restore,
+};
+
+/**
+ * context_save() - save old context when necessary
+ * @drawctxt - the old context
+ *
+ * For legacy context switching, we need to issue save
+ * commands unless the context is being destroyed.
+ */
+static inline int context_save(struct adreno_device *adreno_dev,
+				struct adreno_context *context)
+{
+	if (context->ops->save == NULL
+		|| kgsl_context_detached(&context->base)
+		|| context->state == ADRENO_CONTEXT_STATE_INVALID)
+		return 0;
+
+	return context->ops->save(adreno_dev, context);
+}
+
 /**
  * adreno_drawctxt_set_bin_base_offset - set bin base offset for the context
  * @device - KGSL device that owns the context
@@ -296,50 +667,79 @@
  * Switch the current draw context
  */
 
-void adreno_drawctxt_switch(struct adreno_device *adreno_dev,
+int adreno_drawctxt_switch(struct adreno_device *adreno_dev,
 				struct adreno_context *drawctxt,
 				unsigned int flags)
 {
 	struct kgsl_device *device = &adreno_dev->dev;
+	int ret = 0;
 
 	if (drawctxt) {
+		/*
+		* Handle legacy gmem / save restore flag on each IB.
+		* Userspace sets to guard IB sequences that require
+		* gmem to be saved and clears it at the end of the
+		* sequence.
+		*/
 		if (flags & KGSL_CONTEXT_SAVE_GMEM)
 			/* Set the flag in context so that the save is done
 			* when this context is switched out. */
-			drawctxt->flags |= CTXT_FLAGS_GMEM_SAVE;
+			set_bit(ADRENO_CONTEXT_GMEM_SAVE, &drawctxt->priv);
 		else
 			/* Remove GMEM saving flag from the context */
-			drawctxt->flags &= ~CTXT_FLAGS_GMEM_SAVE;
+			clear_bit(ADRENO_CONTEXT_GMEM_SAVE, &drawctxt->priv);
 	}
 
 	/* already current? */
 	if (adreno_dev->drawctxt_active == drawctxt) {
-		if (adreno_dev->gpudev->ctxt_draw_workaround &&
-			adreno_is_a225(adreno_dev))
-				adreno_dev->gpudev->ctxt_draw_workaround(
-					adreno_dev, drawctxt);
-		return;
+		if (drawctxt && drawctxt->ops->draw_workaround)
+			ret = drawctxt->ops->draw_workaround(adreno_dev,
+							 drawctxt);
+		return ret;
 	}
 
-	KGSL_CTXT_INFO(device, "from %d to %d flags %d\n",
-		adreno_dev->drawctxt_active ?
-		adreno_dev->drawctxt_active->base.id : 0,
-		drawctxt ? drawctxt->base.id : 0, flags);
+	trace_adreno_drawctxt_switch(adreno_dev->drawctxt_active,
+		drawctxt, flags);
 
-	/* Save the old context */
-	adreno_dev->gpudev->ctxt_save(adreno_dev, adreno_dev->drawctxt_active);
-
-	/* Put the old instance of the active drawctxt */
 	if (adreno_dev->drawctxt_active) {
+		ret = context_save(adreno_dev, adreno_dev->drawctxt_active);
+		if (ret) {
+			KGSL_DRV_ERR(device,
+				"Error in GPU context %d save: %d\n",
+				adreno_dev->drawctxt_active->base.id, ret);
+			return ret;
+		}
+
+		/* Put the old instance of the active drawctxt */
 		kgsl_context_put(&adreno_dev->drawctxt_active->base);
 		adreno_dev->drawctxt_active = NULL;
 	}
 
 	/* Get a refcount to the new instance */
-	if (drawctxt)
-		_kgsl_context_get(&drawctxt->base);
+	if (drawctxt) {
+		if (!_kgsl_context_get(&drawctxt->base))
+			return -EINVAL;
 
-	/* Set the new context */
-	adreno_dev->gpudev->ctxt_restore(adreno_dev, drawctxt);
+		/* Set the new context */
+		ret = drawctxt->ops->restore(adreno_dev, drawctxt);
+		if (ret) {
+			KGSL_DRV_ERR(device,
+					"Error in GPU context %d restore: %d\n",
+					drawctxt->base.id, ret);
+			return ret;
+		}
+	} else {
+		/*
+		 * No context - set the default pagetable and thats it.
+		 * If there isn't a current context, the kgsl_mmu_setstate
+		 * will use the CPU path so we don't need to give
+		 * it a valid context id.
+		 */
+		ret = kgsl_mmu_setstate(&device->mmu,
+					 device->mmu.defaultpagetable,
+					 KGSL_CONTEXT_INVALID);
+	}
+
 	adreno_dev->drawctxt_active = drawctxt;
+	return 0;
 }
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
index 3088099..7656cd5 100644
--- a/drivers/gpu/msm/adreno_drawctxt.h
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -16,44 +16,6 @@
 #include "adreno_pm4types.h"
 #include "a2xx_reg.h"
 
-/* Flags */
-
-#define CTXT_FLAGS_NOT_IN_USE		0x00000000
-#define CTXT_FLAGS_IN_USE		BIT(0)
-
-/* state shadow memory allocated */
-#define CTXT_FLAGS_STATE_SHADOW		BIT(1)
-
-/* gmem shadow memory allocated */
-#define CTXT_FLAGS_GMEM_SHADOW		BIT(2)
-/* gmem must be copied to shadow */
-#define CTXT_FLAGS_GMEM_SAVE		BIT(3)
-/* gmem can be restored from shadow */
-#define CTXT_FLAGS_GMEM_RESTORE		BIT(4)
-/* preamble packed in cmdbuffer for context switching */
-#define CTXT_FLAGS_PREAMBLE		BIT(5)
-/* shader must be copied to shadow */
-#define CTXT_FLAGS_SHADER_SAVE		BIT(6)
-/* shader can be restored from shadow */
-#define CTXT_FLAGS_SHADER_RESTORE	BIT(7)
-/* Context has caused a GPU hang */
-#define CTXT_FLAGS_GPU_HANG		BIT(8)
-/* Specifies there is no need to save GMEM */
-#define CTXT_FLAGS_NOGMEMALLOC          BIT(9)
-/* Trash state for context */
-#define CTXT_FLAGS_TRASHSTATE		BIT(10)
-/* per context timestamps enabled */
-#define CTXT_FLAGS_PER_CONTEXT_TS	BIT(11)
-/* Context has caused a GPU hang and fault tolerance successful */
-#define CTXT_FLAGS_GPU_HANG_FT	BIT(12)
-/* Context is being destroyed so dont save it */
-#define CTXT_FLAGS_BEING_DESTROYED	BIT(13)
-/* User mode generated timestamps enabled */
-#define CTXT_FLAGS_USER_GENERATED_TS    BIT(14)
-/* Context skip till EOF */
-#define CTXT_FLAGS_SKIP_EOF             BIT(15)
-/* Context no fault tolerance */
-#define CTXT_FLAGS_NO_FAULT_TOLERANCE  BIT(16)
 
 /* Symbolic table for the adreno draw context type */
 #define ADRENO_DRAWCTXT_TYPES \
@@ -69,6 +31,13 @@
 	const char *str;
 };
 
+#define ADRENO_CONTEXT_CMDQUEUE_SIZE 128
+
+#define ADRENO_CONTEXT_DEFAULT_PRIORITY 1
+
+#define ADRENO_CONTEXT_STATE_ACTIVE 0
+#define ADRENO_CONTEXT_STATE_INVALID 1
+
 struct kgsl_device;
 struct adreno_device;
 struct kgsl_device_private;
@@ -99,18 +68,83 @@
 	struct kgsl_memdesc quad_vertices_restore;
 };
 
+struct adreno_context;
+
+/**
+ * struct adreno_context_ops - context state management functions
+ * @save: optional hook for saving context state
+ * @restore: required hook for restoring state,
+ *		adreno_context_restore() may be used directly here.
+ * @draw_workaround: optional hook for a workaround after every IB
+ * @detach: optional hook for freeing state tracking memory.
+ */
+struct adreno_context_ops {
+	int (*save)(struct adreno_device *, struct adreno_context *);
+	int (*restore)(struct adreno_device *, struct adreno_context *);
+	int (*draw_workaround)(struct adreno_device *,
+				struct adreno_context *);
+	void (*detach)(struct adreno_context *);
+};
+
+int adreno_context_restore(struct adreno_device *, struct adreno_context *);
+
+/* generic context ops for preamble context switch */
+extern const struct adreno_context_ops adreno_preamble_ctx_ops;
+
+/**
+ * struct adreno_context - Adreno GPU draw context
+ * @id: Unique integer ID of the context
+ * @timestamp: Last issued context-specific timestamp
+ * @internal_timestamp: Global timestamp of the last issued command
+ *			NOTE: guarded by device->mutex, not drawctxt->mutex!
+ * @state: Current state of the context
+ * @priv: Internal flags
+ * @type: Context type (GL, CL, RS)
+ * @mutex: Mutex to protect the cmdqueue
+ * @pagetable: Pointer to the GPU pagetable for the context
+ * @gpustate: Pointer to the GPU scratch memory for context save/restore
+ * @reg_restore: Command buffer for restoring context registers
+ * @shader_save: Command buffer for saving shaders
+ * @shader_restore: Command buffer to restore shaders
+ * @context_gmem_shadow: GMEM shadow structure for save/restore
+ * @reg_save: A2XX command buffer to save context registers
+ * @shader_fixup: A2XX command buffer to "fix" shaders on restore
+ * @chicken_restore: A2XX command buffer to "fix" register restore
+ * @bin_base_offset: Saved value of the A2XX BIN_BASE_OFFSET register
+ * @regconstant_save: A3XX command buffer to save some registers
+ * @constant_retore: A3XX command buffer to restore some registers
+ * @hslqcontrol_restore: A3XX command buffer to restore HSLSQ registers
+ * @save_fixup: A3XX command buffer to "fix" register save
+ * @restore_fixup: A3XX cmmand buffer to restore register save fixes
+ * @shader_load_commands: A3XX GPU memory descriptor for shader load IB
+ * @shader_save_commands: A3XX GPU memory descriptor for shader save IB
+ * @constantr_save_commands: A3XX GPU memory descriptor for constant save IB
+ * @constant_load_commands: A3XX GPU memory descriptor for constant load IB
+ * @cond_execs: A3XX GPU memory descriptor for conditional exec IB
+ * @hlsq_restore_commands: A3XX GPU memory descriptor for HLSQ restore IB
+ * @cmdqueue: Queue of command batches waiting to be dispatched for this context
+ * @cmdqueue_head: Head of the cmdqueue queue
+ * @cmdqueue_tail: Tail of the cmdqueue queue
+ * @pending: Priority list node for the dispatcher list of pending contexts
+ * @wq: Workqueue structure for contexts to sleep pending room in the queue
+ * @waiting: Workqueue structure for contexts waiting for a timestamp or event
+ * @queued: Number of commands queued in the cmdqueue
+ * @ops: Context switch functions for this context.
+ */
 struct adreno_context {
 	struct kgsl_context base;
 	unsigned int ib_gpu_time_used;
 	unsigned int timestamp;
-	uint32_t flags;
+	unsigned int internal_timestamp;
+	int state;
+	unsigned long priv;
 	unsigned int type;
+	struct mutex mutex;
 	struct kgsl_memdesc gpustate;
 	unsigned int reg_restore[3];
 	unsigned int shader_save[3];
 	unsigned int shader_restore[3];
 
-	/* Information of the GMEM shadow that is created in context create */
 	struct gmem_shadow_t context_gmem_shadow;
 
 	/* A2XX specific items */
@@ -131,23 +165,71 @@
 	struct kgsl_memdesc constant_load_commands[3];
 	struct kgsl_memdesc cond_execs[4];
 	struct kgsl_memdesc hlsqcontrol_restore_commands[1];
+
+	/* Dispatcher */
+	struct kgsl_cmdbatch *cmdqueue[ADRENO_CONTEXT_CMDQUEUE_SIZE];
+	unsigned int cmdqueue_head;
+	unsigned int cmdqueue_tail;
+
+	struct plist_node pending;
+	wait_queue_head_t wq;
+	wait_queue_head_t waiting;
+
+	int queued;
+
+	const struct adreno_context_ops *ops;
 };
 
+/**
+ * enum adreno_context_priv - Private flags for an adreno draw context
+ * @ADRENO_CONTEXT_FAULT - set if the context has faulted (and recovered)
+ * @ADRENO_CONTEXT_GMEM_SAVE - gmem must be copied to shadow
+ * @ADRENO_CONTEXT_GMEM_RESTORE - gmem can be restored from shadow
+ * @ADRENO_CONTEXT_SHADER_SAVE - shader must be copied to shadow
+ * @ADRENO_CONTEXT_SHADER_RESTORE - shader can be restored from shadow
+ * @ADRENO_CONTEXT_GPU_HANG - Context has caused a GPU hang
+ * @ADRENO_CONTEXT_GPU_HANG_FT - Context has caused a GPU hang
+ *      and fault tolerance was successful
+ * @ADRENO_CONTEXT_SKIP_EOF - Context skip IBs until the next end of frame
+ *      marker.
+ * @ADRENO_CONTEXT_FORCE_PREAMBLE - Force the preamble for the next submission.
+ */
+enum adreno_context_priv {
+	ADRENO_CONTEXT_FAULT = 0,
+	ADRENO_CONTEXT_GMEM_SAVE,
+	ADRENO_CONTEXT_GMEM_RESTORE,
+	ADRENO_CONTEXT_SHADER_SAVE,
+	ADRENO_CONTEXT_SHADER_RESTORE,
+	ADRENO_CONTEXT_GPU_HANG,
+	ADRENO_CONTEXT_GPU_HANG_FT,
+	ADRENO_CONTEXT_SKIP_EOF,
+	ADRENO_CONTEXT_FORCE_PREAMBLE,
+};
 
 struct kgsl_context *adreno_drawctxt_create(struct kgsl_device_private *,
 			uint32_t *flags);
 
-void adreno_drawctxt_detach(struct kgsl_context *context);
+int adreno_drawctxt_detach(struct kgsl_context *context);
 
 void adreno_drawctxt_destroy(struct kgsl_context *context);
 
-void adreno_drawctxt_switch(struct adreno_device *adreno_dev,
+void adreno_drawctxt_sched(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+int adreno_drawctxt_switch(struct adreno_device *adreno_dev,
 				struct adreno_context *drawctxt,
 				unsigned int flags);
 void adreno_drawctxt_set_bin_base_offset(struct kgsl_device *device,
 					struct kgsl_context *context,
 					unsigned int offset);
 
+int adreno_drawctxt_wait(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout);
+
+void adreno_drawctxt_invalidate(struct kgsl_device *device,
+		struct kgsl_context *context);
+
 /* GPU context switch helper functions */
 
 void build_quad_vtxbuff(struct adreno_context *drawctxt,
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
index d1e2b43..37c3c50 100644
--- a/drivers/gpu/msm/adreno_pm4types.h
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -164,6 +164,8 @@
 
 #define CP_SET_PROTECTED_MODE  0x5f /* sets the register protection mode */
 
+#define CP_BOOTSTRAP_UCODE  0x6f /* bootstraps microcode */
+
 /*
  * for a3xx
  */
diff --git a/drivers/gpu/msm/adreno_postmortem.c b/drivers/gpu/msm/adreno_postmortem.c
index 32dbd51..0397cca 100644
--- a/drivers/gpu/msm/adreno_postmortem.c
+++ b/drivers/gpu/msm/adreno_postmortem.c
@@ -22,6 +22,7 @@
 #include "adreno_ringbuffer.h"
 #include "kgsl_cffdump.h"
 #include "kgsl_pwrctrl.h"
+#include "adreno_trace.h"
 
 #include "a2xx_reg.h"
 #include "a3xx_reg.h"
@@ -395,8 +396,8 @@
 
 int adreno_dump(struct kgsl_device *device, int manual)
 {
-	unsigned int cp_ib1_base, cp_ib1_bufsz;
-	unsigned int cp_ib2_base, cp_ib2_bufsz;
+	unsigned int cp_ib1_base;
+	unsigned int cp_ib2_base;
 	phys_addr_t pt_base, cur_pt_base;
 	unsigned int cp_rb_base, cp_rb_ctrl, rb_count;
 	unsigned int cp_rb_wptr, cp_rb_rptr;
@@ -409,7 +410,6 @@
 	unsigned int ts_processed = 0xdeaddead;
 	struct kgsl_context *context;
 	unsigned int context_id;
-	unsigned int rbbm_status;
 
 	static struct ib_list ib_list;
 
@@ -419,16 +419,10 @@
 
 	mb();
 
-	if (device->pm_dump_enable) {
-		msm_clk_dump_debug_info();
+	msm_clk_dump_debug_info();
 
-		if (adreno_dev->gpudev->postmortem_dump)
-			adreno_dev->gpudev->postmortem_dump(adreno_dev);
-	}
-
-	kgsl_regread(device,
-			adreno_getreg(adreno_dev, ADRENO_REG_RBBM_STATUS),
-			&rbbm_status);
+	if (adreno_dev->gpudev->postmortem_dump)
+		adreno_dev->gpudev->postmortem_dump(adreno_dev);
 
 	pt_base = kgsl_mmu_get_current_ptbase(&device->mmu);
 	cur_pt_base = pt_base;
@@ -450,26 +444,8 @@
 		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB1_BASE),
 		&cp_ib1_base);
 	kgsl_regread(device,
-		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ),
-		&cp_ib1_bufsz);
-	kgsl_regread(device,
 		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB2_BASE),
 		&cp_ib2_base);
-	kgsl_regread(device,
-		adreno_getreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ),
-		&cp_ib2_bufsz);
-
-	/* If postmortem dump is not enabled, dump minimal set and return */
-	if (!device->pm_dump_enable) {
-
-		KGSL_LOG_DUMP(device,
-			"STATUS %08X | IB1:%08X/%08X | IB2: %08X/%08X"
-			" | RPTR: %04X | WPTR: %04X\n",
-			rbbm_status,  cp_ib1_base, cp_ib1_bufsz, cp_ib2_base,
-			cp_ib2_bufsz, cp_rb_rptr, cp_rb_wptr);
-
-		return 0;
-	}
 
 	kgsl_sharedmem_readl(&device->memstore,
 			(unsigned int *) &context_id,
@@ -644,5 +620,9 @@
 error_vfree:
 	vfree(rb_copy);
 end:
+	/* Restart the dispatcher after a manually triggered dump */
+	if (manual)
+		adreno_dispatcher_start(device);
+
 	return result;
 }
diff --git a/drivers/gpu/msm/adreno_profile.c b/drivers/gpu/msm/adreno_profile.c
index 7d9d63f..8d3efd6 100644
--- a/drivers/gpu/msm/adreno_profile.c
+++ b/drivers/gpu/msm/adreno_profile.c
@@ -666,7 +666,7 @@
 	size_t size = 0;
 	char *buf, *pbuf;
 	bool remove_assignment = false;
-	int groupid, countable;
+	int groupid, countable, ret;
 
 	if (len >= PAGE_SIZE || len == 0)
 		return -EINVAL;
@@ -681,7 +681,9 @@
 		goto error_unlock;
 	}
 
-	kgsl_active_count_get(device);
+	ret = kgsl_active_count_get(device);
+	if (ret)
+		return -EINVAL;
 
 	/*
 	 * When adding/removing assignments, ensure that the GPU is done with
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 8aec755..a43bd54 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -30,14 +30,6 @@
 
 #define GSL_RB_NOP_SIZEDWORDS				2
 
-/*
- * CP DEBUG settings for all cores:
- * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control
- * PROG_END_PTR_ENABLE [25] - Allow 128 bit writes to the VBIF
- */
-
-#define CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25))
-
 void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
@@ -67,11 +59,8 @@
 	unsigned long wait_time;
 	unsigned long wait_timeout = msecs_to_jiffies(ADRENO_IDLE_TIMEOUT);
 	unsigned long wait_time_part;
-	unsigned int prev_reg_val[FT_DETECT_REGS_COUNT];
 	unsigned int rptr;
 
-	memset(prev_reg_val, 0, sizeof(prev_reg_val));
-
 	/* if wptr ahead, fill the remaining with NOPs */
 	if (wptr_ahead) {
 		/* -1 for header */
@@ -105,43 +94,13 @@
 		if (freecmds == 0 || freecmds > numcmds)
 			break;
 
-		/* Dont wait for timeout, detect hang faster.
-		 */
-		if (time_after(jiffies, wait_time_part)) {
-			wait_time_part = jiffies +
-				msecs_to_jiffies(KGSL_TIMEOUT_PART);
-			if ((adreno_ft_detect(rb->device,
-						prev_reg_val))){
-				KGSL_DRV_ERR(rb->device,
-				"Hang detected while waiting for freespace in"
-				"ringbuffer rptr: 0x%x, wptr: 0x%x\n",
-				rptr, rb->wptr);
-				goto err;
-			}
-		}
-
 		if (time_after(jiffies, wait_time)) {
 			KGSL_DRV_ERR(rb->device,
 			"Timed out while waiting for freespace in ringbuffer "
 			"rptr: 0x%x, wptr: 0x%x\n", rptr, rb->wptr);
-			goto err;
+			return -ETIMEDOUT;
 		}
 
-		continue;
-
-err:
-		if (!adreno_dump_and_exec_ft(rb->device)) {
-			if (context && context->flags & CTXT_FLAGS_GPU_HANG) {
-				KGSL_CTXT_WARN(rb->device,
-				"Context %p caused a gpu hang. Will not accept commands for context %d\n",
-				context, context->base.id);
-				return -EDEADLK;
-			}
-			wait_time = jiffies + wait_timeout;
-		} else {
-			/* GPU is hung and fault tolerance failed */
-			BUG();
-		}
 	}
 	return 0;
 }
@@ -180,7 +139,8 @@
 	if (!ret) {
 		ptr = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr;
 		rb->wptr += numcmds;
-	}
+	} else
+		ptr = ERR_PTR(ret);
 
 	return ptr;
 }
@@ -247,28 +207,19 @@
  * adreno_ringbuffer_load_pm4_ucode() - Load pm4 ucode
  * @device: Pointer to a KGSL device
  * @start: Starting index in pm4 ucode to load
+ * @end: Ending index of pm4 ucode to load
  * @addr: Address to load the pm4 ucode
  *
  * Load the pm4 ucode from @start at @addr.
  */
-int adreno_ringbuffer_load_pm4_ucode(struct kgsl_device *device,
-					unsigned int start, unsigned int addr)
+inline int adreno_ringbuffer_load_pm4_ucode(struct kgsl_device *device,
+			unsigned int start, unsigned int end, unsigned int addr)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	int i;
 
-	if (adreno_dev->pm4_fw == NULL) {
-		int ret = adreno_ringbuffer_read_pm4_ucode(device);
-		if (ret)
-			return ret;
-	}
-
-	KGSL_DRV_INFO(device, "loading pm4 ucode version: %d\n",
-		adreno_dev->pm4_fw_version);
-
-	adreno_writereg(adreno_dev, ADRENO_REG_CP_DEBUG, CP_DEBUG_DEFAULT);
 	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_WADDR, addr);
-	for (i = start; i < adreno_dev->pm4_fw_size; i++)
+	for (i = start; i < end; i++)
 		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_DATA,
 					adreno_dev->pm4_fw[i]);
 
@@ -310,28 +261,19 @@
  * adreno_ringbuffer_load_pfp_ucode() - Load pfp ucode
  * @device: Pointer to a KGSL device
  * @start: Starting index in pfp ucode to load
+ * @end: Ending index of pfp ucode to load
  * @addr: Address to load the pfp ucode
  *
  * Load the pfp ucode from @start at @addr.
  */
-int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device,
-					unsigned int start, unsigned int addr)
+inline int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device,
+			unsigned int start, unsigned int end, unsigned int addr)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	int i;
 
-	if (adreno_dev->pfp_fw == NULL) {
-		int ret = adreno_ringbuffer_read_pfp_ucode(device);
-		if (ret)
-			return ret;
-	}
-
-	KGSL_DRV_INFO(device, "loading pfp ucode version: %d\n",
-			adreno_dev->pfp_fw_version);
-
-	adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR,
-						addr);
-	for (i = start; i < adreno_dev->pfp_fw_size; i++)
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, addr);
+	for (i = start; i < end; i++)
 		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
 						adreno_dev->pfp_fw[i]);
 
@@ -339,23 +281,98 @@
 }
 
 /**
- * _ringbuffer_start_common() - Ringbuffer start
+ * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode
+ * @rb: Pointer to adreno ringbuffer
+ * @load_jt: If non zero only load Jump tables
+ *
+ * Bootstrap ucode for GPU
+ * load_jt == 0, bootstrap full microcode
+ * load_jt == 1, bootstrap jump tables of microcode
+ *
+ * For example a bootstrap packet would like below
+ * Setup a type3 bootstrap packet
+ * PFP size to bootstrap
+ * PFP addr to write the PFP data
+ * PM4 size to bootstrap
+ * PM4 addr to write the PM4 data
+ * PFP dwords from microcode to bootstrap
+ * PM4 size dwords from microcode to bootstrap
+ */
+static int _ringbuffer_bootstrap_ucode(struct adreno_ringbuffer *rb,
+					unsigned int load_jt)
+{
+	unsigned int *cmds, cmds_gpu, bootstrap_size;
+	int i = 0;
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr;
+
+	/* Only bootstrap jump tables of ucode */
+	if (load_jt) {
+		pm4_idx = adreno_dev->pm4_jt_idx;
+		pm4_addr = adreno_dev->pm4_jt_addr;
+		pfp_idx = adreno_dev->pfp_jt_idx;
+		pfp_addr = adreno_dev->pfp_jt_addr;
+	} else {
+		/* Bootstrap full ucode */
+		pm4_idx = 1;
+		pm4_addr = 0;
+		pfp_idx = 1;
+		pfp_addr = 0;
+	}
+
+	pm4_size = (adreno_dev->pm4_fw_size - pm4_idx);
+	pfp_size = (adreno_dev->pfp_fw_size - pfp_idx);
+
+	/*
+	 * Below set of commands register with PFP that 6f is the
+	 * opcode for bootstrapping
+	 */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x200);
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA, 0x6f0005);
+
+	/* clear ME_HALT to start micro engine */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0);
+
+	bootstrap_size = (pm4_size + pfp_size + 5);
+
+	cmds = adreno_ringbuffer_allocspace(rb, NULL, bootstrap_size);
+	if (cmds == NULL)
+			return -ENOMEM;
+
+	cmds_gpu = rb->buffer_desc.gpuaddr +
+			sizeof(uint) * (rb->wptr - bootstrap_size);
+	/* Construct the packet that bootsraps the ucode */
+	GSL_RB_WRITE(rb->device, cmds, cmds_gpu,
+			cp_type3_packet(CP_BOOTSTRAP_UCODE,
+			(bootstrap_size - 1)));
+	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, pfp_size);
+	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, pfp_addr);
+	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, pm4_size);
+	GSL_RB_WRITE(rb->device, cmds, cmds_gpu, pm4_addr);
+	for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++)
+		GSL_RB_WRITE(rb->device, cmds, cmds_gpu, adreno_dev->pfp_fw[i]);
+	for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++)
+		GSL_RB_WRITE(rb->device, cmds, cmds_gpu, adreno_dev->pm4_fw[i]);
+
+	adreno_ringbuffer_submit(rb);
+	/* idle device to validate bootstrap */
+	return adreno_idle(device);
+}
+
+/**
+ * _ringbuffer_setup_common() - Ringbuffer start
  * @rb: Pointer to adreno ringbuffer
  *
  * Setup ringbuffer for GPU.
  */
-int _ringbuffer_start_common(struct adreno_ringbuffer *rb)
+void _ringbuffer_setup_common(struct adreno_ringbuffer *rb)
 {
-	int status;
-	/*cp_rb_cntl_u cp_rb_cntl; */
 	union reg_cp_rb_cntl cp_rb_cntl;
 	unsigned int rb_cntl;
 	struct kgsl_device *device = rb->device;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
-	if (rb->flags & KGSL_FLAGS_STARTED)
-		return 0;
-
 	kgsl_sharedmem_set(rb->device, &rb->memptrs_desc, 0, 0,
 			   sizeof(struct kgsl_rbmemptrs));
 
@@ -428,6 +445,19 @@
 		kgsl_regwrite(device, REG_CP_QUEUE_THRESHOLDS, 0x003E2008);
 
 	rb->wptr = 0;
+}
+
+/**
+ * _ringbuffer_start_common() - Ringbuffer start
+ * @rb: Pointer to adreno ringbuffer
+ *
+ * Start ringbuffer for GPU.
+ */
+int _ringbuffer_start_common(struct adreno_ringbuffer *rb)
+{
+	int status;
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
 	/* clear ME_HALT to start micro engine */
 	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0);
@@ -459,39 +489,99 @@
 	struct kgsl_device *device = rb->device;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
-	/* load the CP ucode */
-	status = adreno_ringbuffer_load_pm4_ucode(device,
-			adreno_dev->pm4_jt_idx, adreno_dev->pm4_jt_addr);
-	if (status != 0)
-		return status;
+	if (rb->flags & KGSL_FLAGS_STARTED)
+		return 0;
 
-	/* load the prefetch parser ucode */
-	status = adreno_ringbuffer_load_pfp_ucode(device,
-			adreno_dev->pfp_jt_idx, adreno_dev->pfp_jt_addr);
-	if (status != 0)
-		return status;
+	_ringbuffer_setup_common(rb);
 
-	return _ringbuffer_start_common(rb);
+	/* If bootstrapping if supported to load jump tables */
+	if (adreno_bootstrap_ucode(adreno_dev)) {
+		status = _ringbuffer_bootstrap_ucode(rb, 1);
+		if (status != 0)
+			return status;
+
+	} else {
+		/* load the CP jump tables using AHB writes */
+		status = adreno_ringbuffer_load_pm4_ucode(device,
+			adreno_dev->pm4_jt_idx, adreno_dev->pm4_fw_size,
+			adreno_dev->pm4_jt_addr);
+		if (status != 0)
+			return status;
+
+		/* load the prefetch parser jump tables using AHB writes */
+		status = adreno_ringbuffer_load_pfp_ucode(device,
+			adreno_dev->pfp_jt_idx, adreno_dev->pfp_fw_size,
+			adreno_dev->pfp_jt_addr);
+		if (status != 0)
+			return status;
+	}
+
+	status = _ringbuffer_start_common(rb);
+
+	return status;
 }
 
-int adreno_ringbuffer_start(struct adreno_ringbuffer *rb)
+/**
+ * adreno_ringbuffer_cold_start() - Ringbuffer cold start
+ * @rb: Pointer to adreno ringbuffer
+ *
+ * Start the ringbuffer from power collapse.
+ */
+int adreno_ringbuffer_cold_start(struct adreno_ringbuffer *rb)
 {
 	int status;
+	struct kgsl_device *device = rb->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 
 	if (rb->flags & KGSL_FLAGS_STARTED)
 		return 0;
 
-	/* load the CP ucode */
-	status = adreno_ringbuffer_load_pm4_ucode(rb->device, 1, 0);
-	if (status != 0)
-		return status;
+	_ringbuffer_setup_common(rb);
 
-	/* load the prefetch parser ucode */
-	status = adreno_ringbuffer_load_pfp_ucode(rb->device, 1, 0);
-	if (status != 0)
-		return status;
+	/* If bootstrapping if supported to load ucode */
+	if (adreno_bootstrap_ucode(adreno_dev)) {
 
-	return _ringbuffer_start_common(rb);
+		/*
+		 * load first adreno_dev->pm4_bstrp_size +
+		 * adreno_dev->pfp_bstrp_size microcode dwords using AHB write,
+		 * this small microcode has dispatcher + booter, this initial
+		 * microcode enables CP to understand CP_BOOTSTRAP_UCODE packet
+		 * in function _ringbuffer_bootstrap_ucode. CP_BOOTSTRAP_UCODE
+		 * packet loads rest of the microcode.
+		 */
+
+		status = adreno_ringbuffer_load_pm4_ucode(rb->device, 1,
+					adreno_dev->pm4_bstrp_size+1, 0);
+		if (status != 0)
+			return status;
+
+		status = adreno_ringbuffer_load_pfp_ucode(rb->device, 1,
+					adreno_dev->pfp_bstrp_size+1, 0);
+		if (status != 0)
+			return status;
+
+		/* Bootstrap rest of the ucode here */
+		status = _ringbuffer_bootstrap_ucode(rb, 0);
+		if (status != 0)
+			return status;
+
+	} else {
+		/* load the CP ucode using AHB writes */
+		status = adreno_ringbuffer_load_pm4_ucode(rb->device, 1,
+					adreno_dev->pm4_fw_size, 0);
+		if (status != 0)
+			return status;
+
+		/* load the prefetch parser ucode using AHB writes */
+		status = adreno_ringbuffer_load_pfp_ucode(rb->device, 1,
+					adreno_dev->pfp_fw_size, 0);
+		if (status != 0)
+			return status;
+	}
+
+	status = _ringbuffer_start_common(rb);
+
+	return status;
 }
 
 void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb)
@@ -568,20 +658,41 @@
 
 static int
 adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
-				struct adreno_context *context,
+				struct adreno_context *drawctxt,
 				unsigned int flags, unsigned int *cmds,
-				int sizedwords)
+				int sizedwords, uint32_t timestamp)
 {
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
 	unsigned int *ringcmds;
 	unsigned int total_sizedwords = sizedwords;
 	unsigned int i;
 	unsigned int rcmd_gpu;
-	unsigned int context_id = KGSL_MEMSTORE_GLOBAL;
+	unsigned int context_id;
 	unsigned int gpuaddr = rb->device->memstore.gpuaddr;
-	unsigned int timestamp;
 	bool profile_ready;
 
+	if (drawctxt != NULL && kgsl_context_detached(&drawctxt->base))
+		return -EINVAL;
+
+	rb->global_ts++;
+
+	/* If this is a internal IB, use the global timestamp for it */
+	if (!drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
+		timestamp = rb->global_ts;
+		context_id = KGSL_MEMSTORE_GLOBAL;
+	} else {
+		context_id = drawctxt->base.id;
+	}
+
+	/*
+	 * Note that we cannot safely take drawctxt->mutex here without
+	 * potential mutex inversion with device->mutex which is held
+	 * here. As a result, any other code that accesses this variable
+	 * must also use device->mutex.
+	 */
+	if (drawctxt)
+		drawctxt->internal_timestamp = rb->global_ts;
+
 	/*
 	 * If in stream ib profiling is enabled and there are counters
 	 * assigned, then space needs to be reserved for profiling.  This
@@ -590,20 +701,10 @@
 	 * the _addcmds call since it is allocating additional ringbuffer
 	 * command space.
 	 */
-	profile_ready = !adreno_is_a2xx(adreno_dev) && context &&
+	profile_ready = !adreno_is_a2xx(adreno_dev) && drawctxt &&
 		adreno_profile_assignments_ready(&adreno_dev->profile) &&
 		!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE);
 
-	/*
-	 * if the context was not created with per context timestamp
-	 * support, we must use the global timestamp since issueibcmds
-	 * will be returning that one, or if an internal issue then
-	 * use global timestamp.
-	 */
-	if ((context && (context->flags & CTXT_FLAGS_PER_CONTEXT_TS)) &&
-		!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
-		context_id = context->base.id;
-
 	/* reserve space to temporarily turn off protected mode
 	*  error checking if needed
 	*/
@@ -613,13 +714,8 @@
 	/* internal ib command identifier for the ringbuffer */
 	total_sizedwords += (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) ? 2 : 0;
 
-	/* Add CP_COND_EXEC commands to generate CP_INTERRUPT */
-	total_sizedwords += context ? 13 : 0;
-
-	if ((context) && (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) &&
-		(flags & (KGSL_CMD_FLAGS_INTERNAL_ISSUE |
-		KGSL_CMD_FLAGS_GET_INT)))
-			total_sizedwords += 2;
+	/* Add two dwords for the CP_INTERRUPT */
+	total_sizedwords += drawctxt ? 2 : 0;
 
 	if (adreno_is_a3xx(adreno_dev))
 		total_sizedwords += 7;
@@ -627,19 +723,22 @@
 	if (adreno_is_a2xx(adreno_dev))
 		total_sizedwords += 2; /* CP_WAIT_FOR_IDLE */
 
-	total_sizedwords += 2; /* scratchpad ts for fault tolerance */
 	total_sizedwords += 3; /* sop timestamp */
 	total_sizedwords += 4; /* eop timestamp */
 
-	if (KGSL_MEMSTORE_GLOBAL != context_id)
-		total_sizedwords += 3; /* global timestamp without cache
-					* flush for non-zero context */
-
 	if (adreno_is_a20x(adreno_dev))
 		total_sizedwords += 2; /* CACHE_FLUSH */
 
-	if (flags & KGSL_CMD_FLAGS_EOF)
-		total_sizedwords += 2;
+	if (drawctxt) {
+		total_sizedwords += 3; /* global timestamp without cache
+					* flush for non-zero context */
+	}
+
+	if (adreno_is_a20x(adreno_dev))
+		total_sizedwords += 2; /* CACHE_FLUSH */
+
+	if (flags & KGSL_CMD_FLAGS_WFI)
+		total_sizedwords += 2; /* WFI */
 
 	if (profile_ready)
 		total_sizedwords += 6;   /* space for pre_ib and post_ib */
@@ -648,8 +747,11 @@
 	if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP)
 		total_sizedwords += 5;
 
-	ringcmds = adreno_ringbuffer_allocspace(rb, context, total_sizedwords);
-	if (!ringcmds)
+	ringcmds = adreno_ringbuffer_allocspace(rb, drawctxt, total_sizedwords);
+
+	if (IS_ERR(ringcmds))
+		return PTR_ERR(ringcmds);
+	if (ringcmds == NULL)
 		return -ENOSPC;
 
 	rcmd_gpu = rb->buffer_desc.gpuaddr
@@ -678,24 +780,9 @@
 
 	/* Add any IB required for profiling if it is enabled */
 	if (profile_ready)
-		adreno_profile_preib_processing(rb->device, context->base.id,
+		adreno_profile_preib_processing(rb->device, drawctxt->base.id,
 				&flags, &ringcmds, &rcmd_gpu);
 
-	/* always increment the global timestamp. once. */
-	rb->global_ts++;
-
-	if (KGSL_MEMSTORE_GLOBAL != context_id)
-		timestamp = context->timestamp;
-	else
-		timestamp = rb->global_ts;
-
-	/* scratchpad ts for fault tolerance */
-	GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
-		cp_type0_packet(adreno_getreg(adreno_dev,
-			ADRENO_REG_CP_TIMESTAMP), 1));
-	GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
-			rb->global_ts);
-
 	/* start-of-pipeline timestamp */
 	GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
 			cp_type3_packet(CP_MEM_WRITE, 2));
@@ -765,7 +852,7 @@
 		KGSL_MEMSTORE_OFFSET(context_id, eoptimestamp)));
 	GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, timestamp);
 
-	if (KGSL_MEMSTORE_GLOBAL != context_id) {
+	if (drawctxt) {
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
 			cp_type3_packet(CP_MEM_WRITE, 2));
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, (gpuaddr +
@@ -781,56 +868,13 @@
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, CACHE_FLUSH);
 	}
 
-	if (context) {
-		/* Conditional execution based on memory values */
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
-			cp_type3_packet(CP_COND_EXEC, 4));
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, (gpuaddr +
-			KGSL_MEMSTORE_OFFSET(
-				context_id, ts_cmp_enable)) >> 2);
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, (gpuaddr +
-			KGSL_MEMSTORE_OFFSET(
-				context_id, ref_wait_ts)) >> 2);
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, timestamp);
-		/* # of conditional command DWORDs */
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 8);
-
-		/* Clear the ts_cmp_enable for the context */
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
-			cp_type3_packet(CP_MEM_WRITE, 2));
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, gpuaddr +
-			KGSL_MEMSTORE_OFFSET(
-				context_id, ts_cmp_enable));
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 0x0);
-
-		/* Clear the ts_cmp_enable for the global timestamp */
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
-			cp_type3_packet(CP_MEM_WRITE, 2));
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, gpuaddr +
-			KGSL_MEMSTORE_OFFSET(
-				KGSL_MEMSTORE_GLOBAL, ts_cmp_enable));
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 0x0);
-
-		/* Trigger the interrupt */
+	if (drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
 			cp_type3_packet(CP_INTERRUPT, 1));
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
 				CP_INT_CNTL__RB_INT_MASK);
 	}
 
-	/*
-	 * If per context timestamps are enabled and any of the kgsl
-	 * internal commands want INT to be generated trigger the INT
-	*/
-	if ((context) && (context->flags & CTXT_FLAGS_PER_CONTEXT_TS) &&
-		(flags & (KGSL_CMD_FLAGS_INTERNAL_ISSUE |
-		KGSL_CMD_FLAGS_GET_INT))) {
-			GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
-				cp_type3_packet(CP_INTERRUPT, 1));
-			GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
-				CP_INT_CNTL__RB_INT_MASK);
-	}
-
 	if (adreno_is_a3xx(adreno_dev)) {
 		/* Dummy set-constant to trigger context rollover */
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
@@ -840,10 +884,10 @@
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 0);
 	}
 
-	if (flags & KGSL_CMD_FLAGS_EOF) {
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, cp_nop_packet(1));
+	if (flags & KGSL_CMD_FLAGS_WFI) {
 		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu,
-				KGSL_END_OF_FRAME_IDENTIFIER);
+			cp_type3_packet(CP_WAIT_FOR_IDLE, 1));
+		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, 0x00000000);
 	}
 
 	adreno_ringbuffer_submit(rb);
@@ -861,14 +905,10 @@
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
 
-	if (device->state & KGSL_STATE_HUNG)
-		return kgsl_readtimestamp(device, KGSL_MEMSTORE_GLOBAL,
-					KGSL_TIMESTAMP_RETIRED);
-
 	flags |= KGSL_CMD_FLAGS_INTERNAL_ISSUE;
 
 	return adreno_ringbuffer_addcmds(rb, drawctxt, flags, cmds,
-							sizedwords);
+		sizedwords, 0);
 }
 
 static bool _parse_ibs(struct kgsl_device_private *dev_priv, uint gpuaddr,
@@ -1061,41 +1101,100 @@
 	return ret;
 }
 
-int
-adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
-				struct kgsl_context *context,
-				struct kgsl_ibdesc *ibdesc,
-				unsigned int numibs,
-				uint32_t *timestamp,
-				unsigned int flags)
+/**
+ * _ringbuffer_verify_ib() - parse an IB and verify that it is correct
+ * @dev_priv: Pointer to the process struct
+ * @ibdesc: Pointer to the IB descriptor
+ *
+ * This function only gets called if debugging is enabled  - it walks the IB and
+ * does additional level parsing and verification above and beyond what KGSL
+ * core does
+ */
+static inline bool _ringbuffer_verify_ib(struct kgsl_device_private *dev_priv,
+		struct kgsl_ibdesc *ibdesc)
 {
 	struct kgsl_device *device = dev_priv->device;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
-	unsigned int *link = 0;
+
+	/* Check that the size of the IBs is under the allowable limit */
+	if (ibdesc->sizedwords == 0 || ibdesc->sizedwords > 0xFFFFF) {
+		KGSL_DRV_ERR(device, "Invalid IB size 0x%X\n",
+				ibdesc->sizedwords);
+		return false;
+	}
+
+	if (unlikely(adreno_dev->ib_check_level >= 1) &&
+		!_parse_ibs(dev_priv, ibdesc->gpuaddr, ibdesc->sizedwords)) {
+		KGSL_DRV_ERR(device, "Could not verify the IBs\n");
+		return false;
+	}
+
+	return true;
+}
+
+int
+adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
+				struct kgsl_context *context,
+				struct kgsl_cmdbatch *cmdbatch,
+				uint32_t *timestamp)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int i, ret;
+
+	if (drawctxt->state == ADRENO_CONTEXT_STATE_INVALID)
+		return -EDEADLK;
+
+	/* Verify the IBs before they get queued */
+
+	for (i = 0; i < cmdbatch->ibcount; i++) {
+		if (!_ringbuffer_verify_ib(dev_priv, &cmdbatch->ibdesc[i]))
+			return -EINVAL;
+	}
+
+	/* wait for the suspend gate */
+	wait_for_completion(&device->cmdbatch_gate);
+
+	/* Queue the command in the ringbuffer */
+	ret = adreno_dispatcher_queue_cmd(adreno_dev, drawctxt, cmdbatch,
+		timestamp);
+
+	if (ret)
+		KGSL_DRV_ERR(device,
+			"adreno_dispatcher_queue_cmd returned %d\n", ret);
+
+	/*
+	 * Return -EPROTO if the device has faulted since the last time we
+	 * checked - userspace uses this to perform post-fault activities
+	 */
+	if (!ret && test_and_clear_bit(ADRENO_CONTEXT_FAULT, &drawctxt->priv))
+		ret = -EPROTO;
+
+	return ret;
+}
+
+/* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */
+int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_cmdbatch *cmdbatch)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct kgsl_ibdesc *ibdesc;
+	unsigned int numibs;
+	unsigned int *link;
 	unsigned int *cmds;
-	unsigned int i, cmdflags;
-	struct adreno_context *drawctxt = NULL;
+	unsigned int i;
+	struct kgsl_context *context;
+	struct adreno_context *drawctxt;
 	unsigned int start_index = 0;
+	int flags = KGSL_CMD_FLAGS_NONE;
 	int ret;
 
-	if (device->state & KGSL_STATE_HUNG) {
-		ret = -EBUSY;
-		goto done;
-	}
-
-	if (!(adreno_dev->ringbuffer.flags & KGSL_FLAGS_STARTED) ||
-	      context == NULL || ibdesc == 0 || numibs == 0) {
-		ret = -EINVAL;
-		goto done;
-	}
+	context = cmdbatch->context;
 	drawctxt = ADRENO_CONTEXT(context);
 
-	if (drawctxt->flags & CTXT_FLAGS_GPU_HANG) {
-		ret = -EDEADLK;
-		goto done;
-	}
-
-	cmdflags = (flags & KGSL_CMD_FLAGS_EOF);
+	ibdesc = cmdbatch->ibdesc;
+	numibs = cmdbatch->ibcount;
 
 	/* process any profiling results that are available into the log_buf */
 	adreno_profile_process_results(device);
@@ -1104,17 +1203,21 @@
 	commands are stored in the first node of the IB chain. We can skip that
 	if a context switch hasn't occured */
 
-	if (drawctxt->flags & CTXT_FLAGS_PREAMBLE &&
-		adreno_dev->drawctxt_active == drawctxt)
+	if ((drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE) &&
+		!test_bit(CMDBATCH_FLAG_FORCE_PREAMBLE, &cmdbatch->priv) &&
+		(adreno_dev->drawctxt_active == drawctxt))
 		start_index = 1;
 
-	if (drawctxt->flags & CTXT_FLAGS_SKIP_EOF) {
-		if (flags & KGSL_CMD_FLAGS_EOF)
-			drawctxt->flags &= ~CTXT_FLAGS_SKIP_EOF;
-		if (start_index)
-			numibs = 1;
-		else
-			numibs = 0;
+	/*
+	 * In skip mode don't issue the draw IBs but keep all the other
+	 * accoutrements of a submision (including the interrupt) to keep
+	 * the accounting sane. Set start_index and numibs to 0 to just
+	 * generate the start and end markers and skip everything else
+	 */
+
+	if (test_bit(CMDBATCH_FLAG_SKIP, &cmdbatch->priv)) {
+		start_index = 0;
+		numibs = 0;
 	}
 
 	cmds = link = kzalloc(sizeof(unsigned int) * (numibs * 3 + 4),
@@ -1135,19 +1238,17 @@
 		*cmds++ = ibdesc[0].sizedwords;
 	}
 	for (i = start_index; i < numibs; i++) {
-		if (unlikely(adreno_dev->ib_check_level >= 1 &&
-		    !_parse_ibs(dev_priv, ibdesc[i].gpuaddr,
-				ibdesc[i].sizedwords))) {
-			ret = -EINVAL;
-			goto done;
-		}
 
-		if (ibdesc[i].sizedwords == 0) {
-			ret = -EINVAL;
-			goto done;
-		}
+		/*
+		 * Skip 0 sized IBs - these are presumed to have been removed
+		 * from consideration by the FT policy
+		 */
 
-		*cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
+		if (ibdesc[i].sizedwords == 0)
+			*cmds++ = cp_nop_packet(2);
+		else
+			*cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
+
 		*cmds++ = ibdesc[i].gpuaddr;
 		*cmds++ = ibdesc[i].sizedwords;
 	}
@@ -1155,257 +1256,57 @@
 	*cmds++ = cp_nop_packet(1);
 	*cmds++ = KGSL_END_OF_IB_IDENTIFIER;
 
-	kgsl_setstate(&device->mmu, context->id,
+	ret = kgsl_setstate(&device->mmu, context->id,
 		      kgsl_mmu_pt_get_flags(device->mmu.hwpagetable,
 					device->id));
 
-	adreno_drawctxt_switch(adreno_dev, drawctxt, flags);
-
-	if (test_and_clear_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv) &&
-		test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
-			cmdflags |= KGSL_CMD_FLAGS_PWRON_FIXUP;
-
-	if (drawctxt->flags & CTXT_FLAGS_USER_GENERATED_TS) {
-		if (timestamp_cmp(drawctxt->timestamp, *timestamp) >= 0) {
-			KGSL_DRV_ERR(device,
-				"Invalid user generated ts <%d:0x%x>, "
-				"less than last issued ts <%d:0x%x>\n",
-				context->id, *timestamp, context->id,
-				drawctxt->timestamp);
-			return -ERANGE;
-		}
-		drawctxt->timestamp = *timestamp;
-	} else
-		drawctxt->timestamp++;
-
-	ret = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer,
-					drawctxt,
-					cmdflags,
-					&link[0], (cmds - link));
 	if (ret)
 		goto done;
 
-	if (drawctxt->flags & CTXT_FLAGS_PER_CONTEXT_TS)
-		*timestamp = drawctxt->timestamp;
-	else
-		*timestamp = adreno_dev->ringbuffer.global_ts;
+	ret = adreno_drawctxt_switch(adreno_dev, drawctxt, cmdbatch->flags);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret)
+		goto done;
+
+	if (test_bit(CMDBATCH_FLAG_WFI, &cmdbatch->priv))
+		flags = KGSL_CMD_FLAGS_WFI;
+
+	/*
+	 * For some targets, we need to execute a dummy shader operation after a
+	 * power collapse
+	 */
+
+	if (test_and_clear_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv) &&
+		test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
+		flags |= KGSL_CMD_FLAGS_PWRON_FIXUP;
+
+	ret = adreno_ringbuffer_addcmds(&adreno_dev->ringbuffer,
+					drawctxt,
+					flags,
+					&link[0], (cmds - link),
+					cmdbatch->timestamp);
 
 #ifdef CONFIG_MSM_KGSL_CFF_DUMP
+	if (ret)
+		goto done;
 	/*
 	 * insert wait for idle after every IB1
 	 * this is conservative but works reliably and is ok
 	 * even for performance simulations
 	 */
-	adreno_idle(device);
+	ret = adreno_idle(device);
 #endif
 
-	/*
-	 * If context hung and recovered then return error so that the
-	 * application may handle it
-	 */
-	if (drawctxt->flags & CTXT_FLAGS_GPU_HANG_FT) {
-		drawctxt->flags &= ~CTXT_FLAGS_GPU_HANG_FT;
-		ret = -EPROTO;
-	} else
-		ret = 0;
-
 done:
 	device->pwrctrl.irq_last = 0;
-	kgsl_trace_issueibcmds(device, context ? context->id : 0, ibdesc,
-		numibs, *timestamp, flags, ret,
-		drawctxt ? drawctxt->type : 0);
+	kgsl_trace_issueibcmds(device, context->id, cmdbatch,
+		cmdbatch->timestamp, cmdbatch->flags, ret,
+		drawctxt->type);
 
 	kfree(link);
 	return ret;
 }
-
-static void _turn_preamble_on_for_ib_seq(struct adreno_ringbuffer *rb,
-				unsigned int rb_rptr)
-{
-	unsigned int temp_rb_rptr = rb_rptr;
-	unsigned int size = rb->buffer_desc.size;
-	unsigned int val[2];
-	int i = 0;
-	bool check = false;
-	bool cmd_start = false;
-
-	/* Go till the start of the ib sequence and turn on preamble */
-	while (temp_rb_rptr / sizeof(unsigned int) != rb->wptr) {
-		kgsl_sharedmem_readl(&rb->buffer_desc, &val[i], temp_rb_rptr);
-		if (check && KGSL_START_OF_IB_IDENTIFIER == val[i]) {
-			/* decrement i */
-			i = (i + 1) % 2;
-			if (val[i] == cp_nop_packet(4)) {
-				temp_rb_rptr = adreno_ringbuffer_dec_wrapped(
-						temp_rb_rptr, size);
-				kgsl_sharedmem_writel(rb->device,
-					&rb->buffer_desc,
-					temp_rb_rptr, cp_nop_packet(1));
-			}
-			KGSL_FT_INFO(rb->device,
-			"Turned preamble on at offset 0x%x\n",
-			temp_rb_rptr / 4);
-			break;
-		}
-		/* If you reach beginning of next command sequence then exit
-		 * First command encountered is the current one so don't break
-		 * on that. */
-		if (KGSL_CMD_IDENTIFIER == val[i]) {
-			if (cmd_start)
-				break;
-			cmd_start = true;
-		}
-
-		i = (i + 1) % 2;
-		if (1 == i)
-			check = true;
-		temp_rb_rptr = adreno_ringbuffer_inc_wrapped(temp_rb_rptr,
-								size);
-	}
-}
-
-void adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
-				struct adreno_ft_data *ft_data)
-{
-	struct kgsl_device *device = rb->device;
-	unsigned int rb_rptr = ft_data->start_of_replay_cmds;
-	unsigned int good_rb_idx = 0, bad_rb_idx = 0, temp_rb_idx = 0;
-	unsigned int last_good_cmd_end_idx = 0, last_bad_cmd_end_idx = 0;
-	unsigned int cmd_start_idx = 0;
-	unsigned int val1 = 0;
-	int copy_rb_contents = 0;
-	unsigned int temp_rb_rptr;
-	struct kgsl_context *k_ctxt;
-	struct adreno_context *a_ctxt;
-	unsigned int size = rb->buffer_desc.size;
-	unsigned int *temp_rb_buffer = ft_data->rb_buffer;
-	int *rb_size = &ft_data->rb_size;
-	unsigned int *bad_rb_buffer = ft_data->bad_rb_buffer;
-	int *bad_rb_size = &ft_data->bad_rb_size;
-	unsigned int *good_rb_buffer = ft_data->good_rb_buffer;
-	int *good_rb_size = &ft_data->good_rb_size;
-
-	/*
-	 * If the start index from where commands need to be copied is invalid
-	 * then no need to save off any commands
-	 */
-	if (0xFFFFFFFF == ft_data->start_of_replay_cmds)
-		return;
-
-	k_ctxt = kgsl_context_get(device, ft_data->context_id);
-
-	if (k_ctxt) {
-		a_ctxt = ADRENO_CONTEXT(k_ctxt);
-		if (a_ctxt->flags & CTXT_FLAGS_PREAMBLE)
-			_turn_preamble_on_for_ib_seq(rb, rb_rptr);
-		kgsl_context_put(k_ctxt);
-	}
-	k_ctxt = NULL;
-
-	/* Walk the rb from the context switch. Omit any commands
-	 * for an invalid context. */
-	while ((rb_rptr / sizeof(unsigned int)) != rb->wptr) {
-		kgsl_sharedmem_readl(&rb->buffer_desc, &val1, rb_rptr);
-
-		if (KGSL_CMD_IDENTIFIER == val1) {
-			/* Start is the NOP dword that comes before
-			 * KGSL_CMD_IDENTIFIER */
-			cmd_start_idx = temp_rb_idx - 1;
-			if ((copy_rb_contents) && (good_rb_idx))
-				last_good_cmd_end_idx = good_rb_idx - 1;
-			if ((!copy_rb_contents) && (bad_rb_idx))
-				last_bad_cmd_end_idx = bad_rb_idx - 1;
-		}
-
-		/* check for context switch indicator */
-		if (val1 == KGSL_CONTEXT_TO_MEM_IDENTIFIER) {
-			unsigned int temp_idx, val2;
-			/* increment by 3 to get to the context_id */
-			temp_rb_rptr = rb_rptr + (3 * sizeof(unsigned int)) %
-					size;
-			kgsl_sharedmem_readl(&rb->buffer_desc, &val2,
-						temp_rb_rptr);
-
-			/* if context switches to a context that did not cause
-			 * hang then start saving the rb contents as those
-			 * commands can be executed */
-			k_ctxt = kgsl_context_get(rb->device, val2);
-
-			if (k_ctxt) {
-				a_ctxt = ADRENO_CONTEXT(k_ctxt);
-
-			/* If we are changing to a good context and were not
-			 * copying commands then copy over commands to the good
-			 * context */
-			if (!copy_rb_contents && ((k_ctxt &&
-				!(a_ctxt->flags & CTXT_FLAGS_GPU_HANG)) ||
-				!k_ctxt)) {
-				for (temp_idx = cmd_start_idx;
-					temp_idx < temp_rb_idx;
-					temp_idx++)
-					good_rb_buffer[good_rb_idx++] =
-						temp_rb_buffer[temp_idx];
-				ft_data->last_valid_ctx_id = val2;
-				copy_rb_contents = 1;
-				/* remove the good commands from bad buffer */
-				bad_rb_idx = last_bad_cmd_end_idx;
-			} else if (copy_rb_contents && k_ctxt &&
-				(a_ctxt->flags & CTXT_FLAGS_GPU_HANG)) {
-
-				/* If we are changing back to a bad context
-				 * from good ctxt and were not copying commands
-				 * to bad ctxt then copy over commands to
-				 * the bad context */
-				for (temp_idx = cmd_start_idx;
-					temp_idx < temp_rb_idx;
-					temp_idx++)
-					bad_rb_buffer[bad_rb_idx++] =
-						temp_rb_buffer[temp_idx];
-				/* If we are changing to bad context then
-				 * remove the dwords we copied for this
-				 * sequence from the good buffer */
-				good_rb_idx = last_good_cmd_end_idx;
-				copy_rb_contents = 0;
-			}
-			}
-			kgsl_context_put(k_ctxt);
-		}
-
-		if (copy_rb_contents)
-			good_rb_buffer[good_rb_idx++] = val1;
-		else
-			bad_rb_buffer[bad_rb_idx++] = val1;
-
-		/* Copy both good and bad commands to temp buffer */
-		temp_rb_buffer[temp_rb_idx++] = val1;
-
-		rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr, size);
-	}
-	*good_rb_size = good_rb_idx;
-	*bad_rb_size = bad_rb_idx;
-	*rb_size = temp_rb_idx;
-}
-
-void
-adreno_ringbuffer_restore(struct adreno_ringbuffer *rb, unsigned int *rb_buff,
-			int num_rb_contents)
-{
-	int i;
-	unsigned int *ringcmds;
-	unsigned int rcmd_gpu;
-	struct adreno_device *adreno_dev = ADRENO_DEVICE(rb->device);
-
-	if (!num_rb_contents)
-		return;
-
-	if (num_rb_contents > (rb->buffer_desc.size - rb->wptr)) {
-		adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_RPTR, 0);
-		BUG_ON(num_rb_contents > rb->buffer_desc.size);
-	}
-	ringcmds = (unsigned int *)rb->buffer_desc.hostptr + rb->wptr;
-	rcmd_gpu = rb->buffer_desc.gpuaddr + sizeof(unsigned int) * rb->wptr;
-	for (i = 0; i < num_rb_contents; i++)
-		GSL_RB_WRITE(rb->device, ringcmds, rcmd_gpu, rb_buff[i]);
-	rb->wptr += num_rb_contents;
-	adreno_ringbuffer_submit(rb);
-}
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
index 9634e32..eee4127 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.h
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -27,7 +27,6 @@
 
 struct kgsl_device;
 struct kgsl_device_private;
-struct adreno_ft_data;
 
 #define GSL_RB_MEMPTRS_SCRATCH_COUNT	 8
 struct kgsl_rbmemptrs {
@@ -99,16 +98,17 @@
 
 int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
 				struct kgsl_context *context,
-				struct kgsl_ibdesc *ibdesc,
-				unsigned int numibs,
-				uint32_t *timestamp,
-				unsigned int flags);
+				struct kgsl_cmdbatch *cmdbatch,
+				uint32_t *timestamp);
+
+int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_cmdbatch *cmdbatch);
 
 int adreno_ringbuffer_init(struct kgsl_device *device);
 
 int adreno_ringbuffer_warm_start(struct adreno_ringbuffer *rb);
 
-int adreno_ringbuffer_start(struct adreno_ringbuffer *rb);
+int adreno_ringbuffer_cold_start(struct adreno_ringbuffer *rb);
 
 void adreno_ringbuffer_stop(struct adreno_ringbuffer *rb);
 
@@ -124,13 +124,6 @@
 
 void kgsl_cp_intrcallback(struct kgsl_device *device);
 
-void adreno_ringbuffer_extract(struct adreno_ringbuffer *rb,
-				struct adreno_ft_data *ft_data);
-
-void
-adreno_ringbuffer_restore(struct adreno_ringbuffer *rb, unsigned int *rb_buff,
-			int num_rb_contents);
-
 unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
 						struct adreno_context *context,
 						unsigned int numcmds);
diff --git a/drivers/gpu/msm/adreno_trace.c b/drivers/gpu/msm/adreno_trace.c
new file mode 100644
index 0000000..607ba8c
--- /dev/null
+++ b/drivers/gpu/msm/adreno_trace.c
@@ -0,0 +1,18 @@
+/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "adreno.h"
+
+/* Instantiate tracepoints */
+#define CREATE_TRACE_POINTS
+#include "adreno_trace.h"
diff --git a/drivers/gpu/msm/adreno_trace.h b/drivers/gpu/msm/adreno_trace.h
new file mode 100644
index 0000000..6079b61
--- /dev/null
+++ b/drivers/gpu/msm/adreno_trace.h
@@ -0,0 +1,270 @@
+/* Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#if !defined(_ADRENO_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _ADRENO_TRACE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kgsl
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE adreno_trace
+
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(adreno_cmdbatch_queued,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int queued),
+	TP_ARGS(cmdbatch, queued),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, queued)
+		__field(unsigned int, flags)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->queued = queued;
+		__entry->flags = cmdbatch->flags;
+	),
+	TP_printk(
+		"ctx=%u ts=%u queued=%u flags=%s",
+			__entry->id, __entry->timestamp, __entry->queued,
+			__entry->flags ? __print_flags(__entry->flags, "|",
+				{ KGSL_CONTEXT_SYNC, "SYNC" },
+				{ KGSL_CONTEXT_END_OF_FRAME, "EOF" })
+				: "none"
+	)
+);
+
+DECLARE_EVENT_CLASS(adreno_cmdbatch_template,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight),
+	TP_ARGS(cmdbatch, inflight),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, inflight)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->inflight = inflight;
+	),
+	TP_printk(
+		"ctx=%u ts=%u inflight=%u",
+			__entry->id, __entry->timestamp,
+			__entry->inflight
+	)
+);
+
+DEFINE_EVENT(adreno_cmdbatch_template, adreno_cmdbatch_submitted,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight),
+	TP_ARGS(cmdbatch, inflight)
+);
+
+TRACE_EVENT(adreno_cmdbatch_retired,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, int inflight),
+	TP_ARGS(cmdbatch, inflight),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, inflight)
+		__field(unsigned int, recovery)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->inflight = inflight;
+		__entry->recovery = cmdbatch->fault_recovery;
+	),
+	TP_printk(
+		"ctx=%u ts=%u inflight=%u recovery=%s",
+			__entry->id, __entry->timestamp,
+			__entry->inflight,
+			__entry->recovery ?
+				__print_flags(__entry->recovery, "|",
+				ADRENO_FT_TYPES) : "none"
+	)
+);
+
+TRACE_EVENT(adreno_cmdbatch_fault,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int fault),
+	TP_ARGS(cmdbatch, fault),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, fault)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->fault = fault;
+	),
+	TP_printk(
+		"ctx=%u ts=%u type=%s",
+			__entry->id, __entry->timestamp,
+			__print_symbolic(__entry->fault,
+				{ 0, "none" },
+				{ ADRENO_SOFT_FAULT, "soft" },
+				{ ADRENO_HARD_FAULT, "hard" },
+				{ ADRENO_TIMEOUT_FAULT, "timeout" })
+	)
+);
+
+TRACE_EVENT(adreno_cmdbatch_recovery,
+	TP_PROTO(struct kgsl_cmdbatch *cmdbatch, unsigned int action),
+	TP_ARGS(cmdbatch, action),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, action)
+	),
+	TP_fast_assign(
+		__entry->id = cmdbatch->context->id;
+		__entry->timestamp = cmdbatch->timestamp;
+		__entry->action = action;
+	),
+	TP_printk(
+		"ctx=%u ts=%u action=%s",
+			__entry->id, __entry->timestamp,
+			__print_symbolic(__entry->action, ADRENO_FT_TYPES)
+	)
+);
+
+DECLARE_EVENT_CLASS(adreno_drawctxt_template,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+	),
+	TP_fast_assign(
+		__entry->id = drawctxt->base.id;
+	),
+	TP_printk("ctx=%u", __entry->id)
+);
+
+DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_sleep,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
+DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_wake,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
+DEFINE_EVENT(adreno_drawctxt_template, dispatch_queue_context,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
+DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_invalidate,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
+TRACE_EVENT(adreno_drawctxt_wait_start,
+	TP_PROTO(unsigned int id, unsigned int ts),
+	TP_ARGS(id, ts),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, ts)
+	),
+	TP_fast_assign(
+		__entry->id = id;
+		__entry->ts = ts;
+	),
+	TP_printk(
+		"ctx=%u ts=%u",
+			__entry->id, __entry->ts
+	)
+);
+
+TRACE_EVENT(adreno_drawctxt_wait_done,
+	TP_PROTO(unsigned int id, unsigned int ts, int status),
+	TP_ARGS(id, ts, status),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, ts)
+		__field(int, status)
+	),
+	TP_fast_assign(
+		__entry->id = id;
+		__entry->ts = ts;
+		__entry->status = status;
+	),
+	TP_printk(
+		"ctx=%u ts=%u status=%d",
+			__entry->id, __entry->ts, __entry->status
+	)
+);
+
+TRACE_EVENT(adreno_drawctxt_switch,
+	TP_PROTO(struct adreno_context *oldctx,
+		struct adreno_context *newctx,
+		unsigned int flags),
+	TP_ARGS(oldctx, newctx, flags),
+	TP_STRUCT__entry(
+		__field(unsigned int, oldctx)
+		__field(unsigned int, newctx)
+		__field(unsigned int, flags)
+	),
+	TP_fast_assign(
+		__entry->oldctx = oldctx ? oldctx->base.id : 0;
+		__entry->newctx = newctx ? newctx->base.id : 0;
+	),
+	TP_printk(
+		"oldctx=%u newctx=%u flags=%X",
+			__entry->oldctx, __entry->newctx, flags
+	)
+);
+
+TRACE_EVENT(adreno_gpu_fault,
+	TP_PROTO(unsigned int ctx, unsigned int ts,
+		unsigned int status, unsigned int rptr, unsigned int wptr,
+		unsigned int ib1base, unsigned int ib1size,
+		unsigned int ib2base, unsigned int ib2size),
+	TP_ARGS(ctx, ts, status, rptr, wptr, ib1base, ib1size, ib2base,
+		ib2size),
+	TP_STRUCT__entry(
+		__field(unsigned int, ctx)
+		__field(unsigned int, ts)
+		__field(unsigned int, status)
+		__field(unsigned int, rptr)
+		__field(unsigned int, wptr)
+		__field(unsigned int, ib1base)
+		__field(unsigned int, ib1size)
+		__field(unsigned int, ib2base)
+		__field(unsigned int, ib2size)
+	),
+	TP_fast_assign(
+		__entry->ctx = ctx;
+		__entry->ts = ts;
+		__entry->status = status;
+		__entry->rptr = rptr;
+		__entry->wptr = wptr;
+		__entry->ib1base = ib1base;
+		__entry->ib1size = ib1size;
+		__entry->ib2base = ib2base;
+		__entry->ib2size = ib2size;
+	),
+	TP_printk("ctx=%d ts=%d status=%X RB=%X/%X IB1=%X/%X IB2=%X/%X",
+		__entry->ctx, __entry->ts, __entry->status, __entry->wptr,
+		__entry->rptr, __entry->ib1base, __entry->ib1size,
+		__entry->ib2base, __entry->ib2size)
+);
+
+#endif /* _ADRENO_TRACE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 298b36e..fe6b34c 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -14,6 +14,7 @@
 #include <linux/fb.h>
 #include <linux/file.h>
 #include <linux/fs.h>
+#include <linux/list.h>
 #include <linux/debugfs.h>
 #include <linux/uaccess.h>
 #include <linux/interrupt.h>
@@ -61,60 +62,14 @@
 
 static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry);
 
-/**
- * kgsl_hang_check() - Check for GPU hang
- * data: KGSL device structure
- *
- * This function is called every KGSL_TIMEOUT_PART time when
- * GPU is active to check for hang. If a hang is detected we
- * trigger fault tolerance.
- */
-void kgsl_hang_check(struct work_struct *work)
-{
-	struct kgsl_device *device = container_of(work, struct kgsl_device,
-							hang_check_ws);
-	static unsigned int prev_reg_val[FT_DETECT_REGS_COUNT];
-
-	mutex_lock(&device->mutex);
-
-	if (device->state == KGSL_STATE_ACTIVE) {
-
-		/* Check to see if the GPU is hung */
-		if (adreno_ft_detect(device, prev_reg_val))
-			adreno_dump_and_exec_ft(device);
-
-		mod_timer(&device->hang_timer,
-			(jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART)));
-	}
-
-	mutex_unlock(&device->mutex);
-}
-
-/**
- * hang_timer() - Hang timer function
- * data: KGSL device structure
- *
- * This function is called when hang timer expires, in this
- * function we check if GPU is in active state and queue the
- * work on device workqueue to check for the hang. We restart
- * the timer after KGSL_TIMEOUT_PART time.
- */
-void hang_timer(unsigned long data)
-{
-	struct kgsl_device *device = (struct kgsl_device *) data;
-
-	if (device->state == KGSL_STATE_ACTIVE) {
-		/* Have work run in a non-interrupt context. */
-		queue_work(device->work_queue, &device->hang_check_ws);
-	}
-}
-
+static void
+kgsl_put_process_private(struct kgsl_device *device,
+			 struct kgsl_process_private *private);
 /**
  * kgsl_trace_issueibcmds() - Call trace_issueibcmds by proxy
  * device: KGSL device
  * id: ID of the context submitting the command
- * ibdesc: Pointer to the list of IB descriptors
- * numib: Number of IBs in the list
+ * cmdbatch: Pointer to kgsl_cmdbatch describing these commands
  * timestamp: Timestamp assigned to the command batch
  * flags: Flags sent by the user
  * result: Result of the submission attempt
@@ -124,11 +79,11 @@
  * GPU specific modules.
  */
 void kgsl_trace_issueibcmds(struct kgsl_device *device, int id,
-		struct kgsl_ibdesc *ibdesc, int numibs,
+		struct kgsl_cmdbatch *cmdbatch,
 		unsigned int timestamp, unsigned int flags,
 		int result, unsigned int type)
 {
-	trace_kgsl_issueibcmds(device, id, ibdesc, numibs,
+	trace_kgsl_issueibcmds(device, id, cmdbatch,
 		timestamp, flags, result, type);
 }
 EXPORT_SYMBOL(kgsl_trace_issueibcmds);
@@ -496,9 +451,16 @@
 	}
 
 	kref_init(&context->refcount);
+	/*
+	 * Get a refernce to the process private so its not destroyed, until
+	 * the context is destroyed. This will also prevent the pagetable
+	 * from being destroyed
+	 */
+	if (!kref_get_unless_zero(&dev_priv->process_priv->refcount))
+		goto fail_free_id;
 	context->device = dev_priv->device;
-	context->pagetable = dev_priv->process_priv->pagetable;
 	context->dev_priv = dev_priv;
+	context->proc_priv = dev_priv->process_priv;
 	context->pid = task_tgid_nr(current);
 	context->tid = task_pid_nr(current);
 
@@ -530,8 +492,8 @@
 EXPORT_SYMBOL(kgsl_context_init);
 
 /**
- * kgsl_context_detach - Release the "master" context reference
- * @context - The context that will be detached
+ * kgsl_context_detach() - Release the "master" context reference
+ * @context: The context that will be detached
  *
  * This is called when a context becomes unusable, because userspace
  * has requested for it to be destroyed. The context itself may
@@ -540,14 +502,12 @@
  * detached by checking the KGSL_CONTEXT_DETACHED bit in
  * context->priv.
  */
-void
-kgsl_context_detach(struct kgsl_context *context)
+int kgsl_context_detach(struct kgsl_context *context)
 {
-	struct kgsl_device *device;
-	if (context == NULL)
-		return;
+	int ret;
 
-	device = context->device;
+	if (context == NULL)
+		return -EINVAL;
 
 	/*
 	 * Mark the context as detached to keep others from using
@@ -555,19 +515,22 @@
 	 * we don't try to detach twice.
 	 */
 	if (test_and_set_bit(KGSL_CONTEXT_DETACHED, &context->priv))
-		return;
+		return -EINVAL;
 
-	trace_kgsl_context_detach(device, context);
+	trace_kgsl_context_detach(context->device, context);
 
-	device->ftbl->drawctxt_detach(context);
+	ret = context->device->ftbl->drawctxt_detach(context);
+
 	/*
 	 * Cancel events after the device-specific context is
 	 * detached, to avoid possibly freeing memory while
 	 * it is still in use by the GPU.
 	 */
-	kgsl_context_cancel_events(device, context);
+	kgsl_context_cancel_events(context->device, context);
 
 	kgsl_context_put(context);
+
+	return ret;
 }
 
 void
@@ -579,6 +542,8 @@
 
 	trace_kgsl_context_destroy(device, context);
 
+	BUG_ON(!kgsl_context_detached(context));
+
 	write_lock(&device->context_lock);
 	if (context->id != KGSL_CONTEXT_INVALID) {
 		idr_remove(&device->context_idr, context->id);
@@ -586,6 +551,8 @@
 	}
 	write_unlock(&device->context_lock);
 	kgsl_sync_timeline_destroy(context);
+	kgsl_put_process_private(device,
+				context->proc_priv);
 
 	device->ftbl->drawctxt_destroy(context);
 }
@@ -649,11 +616,14 @@
 	policy_saved = device->pwrscale.policy;
 	device->pwrscale.policy = NULL;
 	kgsl_pwrctrl_request_state(device, KGSL_STATE_SUSPEND);
-	/*
-	 * Make sure no user process is waiting for a timestamp
-	 * before supending.
-	 */
-	kgsl_active_count_wait(device, 0);
+
+	/* Tell the device to drain the submission queue */
+	device->ftbl->drain(device);
+
+	/* Wait for the active count to hit zero */
+	status = kgsl_active_count_wait(device, 0);
+	if (status)
+		goto end;
 
 	/*
 	 * An interrupt could have snuck in and requested NAP in
@@ -663,13 +633,10 @@
 
 	/* Don't let the timer wake us during suspended sleep. */
 	del_timer_sync(&device->idle_timer);
-	del_timer_sync(&device->hang_timer);
 	switch (device->state) {
 		case KGSL_STATE_INIT:
 			break;
 		case KGSL_STATE_ACTIVE:
-			/* Wait for the device to become idle */
-			device->ftbl->idle(device);
 		case KGSL_STATE_NAP:
 		case KGSL_STATE_SLEEP:
 			/* make sure power is on to stop the device */
@@ -696,6 +663,12 @@
 	status = 0;
 
 end:
+	if (status) {
+		/* On failure, re-resume normal activity */
+		if (device->ftbl->resume)
+			device->ftbl->resume(device);
+	}
+
 	mutex_unlock(&device->mutex);
 	KGSL_PWR_WARN(device, "suspend end\n");
 	return status;
@@ -727,6 +700,10 @@
 	}
 	kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
 
+	/* Call the GPU specific resume function */
+	if (device->ftbl->resume)
+		device->ftbl->resume(device);
+
 	mutex_unlock(&device->mutex);
 	KGSL_PWR_WARN(device, "resume end\n");
 	return 0;
@@ -953,21 +930,16 @@
 	device->open_count--;
 	if (device->open_count == 0) {
 
-		/* Wait for the active count to go to 1 */
-		kgsl_active_count_wait(device, 1);
+		/* Wait for the active count to go to 0 */
+		kgsl_active_count_wait(device, 0);
 
 		/* Fail if the wait times out */
-		BUG_ON(atomic_read(&device->active_cnt) > 1);
+		BUG_ON(atomic_read(&device->active_cnt) > 0);
 
+		/* Force power on to do the stop */
+		kgsl_pwrctrl_enable(device);
 		result = device->ftbl->stop(device);
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
-		/*
-		 * active_cnt special case: we just stopped the device,
-		 * so no need to use kgsl_active_count_put()
-		 */
-		atomic_dec(&device->active_cnt);
-	} else {
-		kgsl_active_count_put(device);
 	}
 	return result;
 
@@ -986,7 +958,6 @@
 	filep->private_data = NULL;
 
 	mutex_lock(&device->mutex);
-	kgsl_active_count_get(device);
 
 	while (1) {
 		read_lock(&device->context_lock);
@@ -996,8 +967,17 @@
 		if (context == NULL)
 			break;
 
-		if (context->dev_priv == dev_priv)
-			kgsl_context_detach(context);
+		if (context->dev_priv == dev_priv) {
+			/*
+			 * Hold a reference to the context in case somebody
+			 * tries to put it while we are detaching
+			 */
+
+			if (_kgsl_context_get(context)) {
+				kgsl_context_detach(context);
+				kgsl_context_put(context);
+			}
+		}
 
 		next = next + 1;
 	}
@@ -1011,6 +991,7 @@
 
 	result = kgsl_close_device(device);
 	mutex_unlock(&device->mutex);
+
 	kfree(dev_priv);
 
 	kgsl_put_process_private(device, private);
@@ -1043,7 +1024,6 @@
 		 * Make sure the gates are open, so they don't block until
 		 * we start suspend or FT.
 		 */
-		complete_all(&device->ft_gate);
 		complete_all(&device->hwaccess_gate);
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
 		kgsl_active_count_put(device);
@@ -1162,7 +1142,8 @@
 		entry = rb_entry(node, struct kgsl_mem_entry, node);
 
 		if (kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) {
-			kgsl_mem_entry_get(entry);
+			if (!kgsl_mem_entry_get(entry))
+				break;
 			spin_unlock(&private->mem_lock);
 			return entry;
 		}
@@ -1262,14 +1243,17 @@
 static inline struct kgsl_mem_entry * __must_check
 kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id)
 {
+	int result = 0;
 	struct kgsl_mem_entry *entry;
 
 	spin_lock(&process->mem_lock);
 	entry = idr_find(&process->mem_idr, id);
 	if (entry)
-		kgsl_mem_entry_get(entry);
+		result = kgsl_mem_entry_get(entry);
 	spin_unlock(&process->mem_lock);
 
+	if (!result)
+		return NULL;
 	return entry;
 }
 
@@ -1439,93 +1423,632 @@
 	return result;
 }
 
-static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
-				      unsigned int cmd, void *data)
-{
-	int result = 0;
-	int i = 0;
-	struct kgsl_ringbuffer_issueibcmds *param = data;
-	struct kgsl_ibdesc *ibdesc;
-	struct kgsl_context *context;
+/*
+ * KGSL command batch management
+ * A command batch is a single submission from userland.  The cmdbatch
+ * encapsulates everything about the submission : command buffers, flags and
+ * sync points.
+ *
+ * Sync points are events that need to expire before the
+ * cmdbatch can be queued to the hardware. For each sync point a
+ * kgsl_cmdbatch_sync_event struct is created and added to a list in the
+ * cmdbatch. There can be multiple types of events both internal ones (GPU
+ * events) and external triggers. As the events expire the struct is deleted
+ * from the list. The GPU will submit the command batch as soon as the list
+ * goes empty indicating that all the sync points have been met.
+ */
 
-	context = kgsl_context_get_owner(dev_priv, param->drawctxt_id);
-	if (context == NULL) {
-		result = -EINVAL;
+/**
+ * struct kgsl_cmdbatch_sync_event
+ * @type: Syncpoint type
+ * @node: Local list node for the cmdbatch sync point list
+ * @cmdbatch: Pointer to the cmdbatch that owns the sync event
+ * @context: Pointer to the KGSL context that owns the cmdbatch
+ * @timestamp: Pending timestamp for the event
+ * @handle: Pointer to a sync fence handle
+ * @device: Pointer to the KGSL device
+ * @lock: Spin lock to protect the sync event list
+ */
+struct kgsl_cmdbatch_sync_event {
+	int type;
+	struct list_head node;
+	struct kgsl_cmdbatch *cmdbatch;
+	struct kgsl_context *context;
+	unsigned int timestamp;
+	struct kgsl_sync_fence_waiter *handle;
+	struct kgsl_device *device;
+	spinlock_t lock;
+};
+
+/**
+ * kgsl_cmdbatch_destroy_object() - Destroy a cmdbatch object
+ * @kref: Pointer to the kref structure for this object
+ *
+ * Actually destroy a command batch object.  Called from kgsl_cmdbatch_put
+ */
+void kgsl_cmdbatch_destroy_object(struct kref *kref)
+{
+	struct kgsl_cmdbatch *cmdbatch = container_of(kref,
+		struct kgsl_cmdbatch, refcount);
+
+	kgsl_context_put(cmdbatch->context);
+	kfree(cmdbatch->ibdesc);
+
+	kfree(cmdbatch);
+}
+EXPORT_SYMBOL(kgsl_cmdbatch_destroy_object);
+
+/*
+ * a generic function to retire a pending sync event and (possibly)
+ * kick the dispatcher
+ */
+static void kgsl_cmdbatch_sync_expire(struct kgsl_device *device,
+	struct kgsl_cmdbatch_sync_event *event)
+{
+	int sched = 0;
+
+	spin_lock(&event->cmdbatch->lock);
+	list_del(&event->node);
+	sched = list_empty(&event->cmdbatch->synclist) ? 1 : 0;
+	spin_unlock(&event->cmdbatch->lock);
+
+	/*
+	 * if this is the last event in the list then tell
+	 * the GPU device that the cmdbatch can be submitted
+	 */
+
+	if (sched && device->ftbl->drawctxt_sched)
+		device->ftbl->drawctxt_sched(device, event->cmdbatch->context);
+}
+
+
+/*
+ * This function is called by the GPU event when the sync event timestamp
+ * expires
+ */
+static void kgsl_cmdbatch_sync_func(struct kgsl_device *device, void *priv,
+		u32 id, u32 timestamp, u32 type)
+{
+	struct kgsl_cmdbatch_sync_event *event = priv;
+
+	kgsl_cmdbatch_sync_expire(device, event);
+
+	kgsl_context_put(event->context);
+	kgsl_cmdbatch_put(event->cmdbatch);
+
+	kfree(event);
+}
+
+/**
+ * kgsl_cmdbatch_destroy() - Destroy a cmdbatch structure
+ * @cmdbatch: Pointer to the command batch object to destroy
+ *
+ * Start the process of destroying a command batch.  Cancel any pending events
+ * and decrement the refcount.
+ */
+void kgsl_cmdbatch_destroy(struct kgsl_cmdbatch *cmdbatch)
+{
+	struct kgsl_cmdbatch_sync_event *event, *tmp;
+
+	spin_lock(&cmdbatch->lock);
+
+	/* Delete any pending sync points for this command batch */
+	list_for_each_entry_safe(event, tmp, &cmdbatch->synclist, node) {
+
+		if (event->type == KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP) {
+			/* Cancel the event if it still exists */
+			kgsl_cancel_event(cmdbatch->device, event->context,
+				event->timestamp, kgsl_cmdbatch_sync_func,
+				event);
+		} else if (event->type == KGSL_CMD_SYNCPOINT_TYPE_FENCE) {
+			if (kgsl_sync_fence_async_cancel(event->handle)) {
+				list_del(&event->node);
+				kfree(event);
+				kgsl_cmdbatch_put(cmdbatch);
+			}
+		}
+	}
+
+	spin_unlock(&cmdbatch->lock);
+	kgsl_cmdbatch_put(cmdbatch);
+}
+EXPORT_SYMBOL(kgsl_cmdbatch_destroy);
+
+/*
+ * A callback that gets registered with kgsl_sync_fence_async_wait and is fired
+ * when a fence is expired
+ */
+static void kgsl_cmdbatch_sync_fence_func(void *priv)
+{
+	struct kgsl_cmdbatch_sync_event *event = priv;
+
+	spin_lock(&event->lock);
+	kgsl_cmdbatch_sync_expire(event->device, event);
+	kgsl_cmdbatch_put(event->cmdbatch);
+	spin_unlock(&event->lock);
+	kfree(event);
+}
+
+/* kgsl_cmdbatch_add_sync_fence() - Add a new sync fence syncpoint
+ * @device: KGSL device
+ * @cmdbatch: KGSL cmdbatch to add the sync point to
+ * @priv: Private sructure passed by the user
+ *
+ * Add a new fence sync syncpoint to the cmdbatch.
+ */
+static int kgsl_cmdbatch_add_sync_fence(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void *priv)
+{
+	struct kgsl_cmd_syncpoint_fence *sync = priv;
+	struct kgsl_cmdbatch_sync_event *event;
+
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+
+	if (event == NULL)
+		return -ENOMEM;
+
+	kref_get(&cmdbatch->refcount);
+
+	event->type = KGSL_CMD_SYNCPOINT_TYPE_FENCE;
+	event->cmdbatch = cmdbatch;
+	event->device = device;
+	spin_lock_init(&event->lock);
+
+	/*
+	 * Add it to the list first to account for the possiblity that the
+	 * callback will happen immediately after the call to
+	 * kgsl_sync_fence_async_wait
+	 */
+
+	spin_lock(&cmdbatch->lock);
+	list_add(&event->node, &cmdbatch->synclist);
+	spin_unlock(&cmdbatch->lock);
+
+	/*
+	 * There is a distinct race condition that can occur if the fence
+	 * callback is fired before the function has a chance to return.  The
+	 * event struct would be freed before we could write event->handle and
+	 * hilarity ensued.  Protect against this by protecting the call to
+	 * kgsl_sync_fence_async_wait and the kfree in the callback with a lock.
+	 */
+
+	spin_lock(&event->lock);
+
+	event->handle = kgsl_sync_fence_async_wait(sync->fd,
+		kgsl_cmdbatch_sync_fence_func, event);
+
+
+	if (IS_ERR_OR_NULL(event->handle)) {
+		int ret = PTR_ERR(event->handle);
+
+		spin_lock(&cmdbatch->lock);
+		list_del(&event->node);
+		spin_unlock(&cmdbatch->lock);
+
+		kgsl_cmdbatch_put(cmdbatch);
+		spin_unlock(&event->lock);
+		kfree(event);
+
+		return ret;
+	}
+
+	spin_unlock(&event->lock);
+	return 0;
+}
+
+/* kgsl_cmdbatch_add_sync_timestamp() - Add a new sync point for a cmdbatch
+ * @device: KGSL device
+ * @cmdbatch: KGSL cmdbatch to add the sync point to
+ * @priv: Private sructure passed by the user
+ *
+ * Add a new sync point timestamp event to the cmdbatch.
+ */
+static int kgsl_cmdbatch_add_sync_timestamp(struct kgsl_device *device,
+		struct kgsl_cmdbatch *cmdbatch, void *priv)
+{
+	struct kgsl_cmd_syncpoint_timestamp *sync = priv;
+	struct kgsl_context *context = kgsl_context_get(cmdbatch->device,
+		sync->context_id);
+	struct kgsl_cmdbatch_sync_event *event;
+	int ret = -EINVAL;
+
+	if (context == NULL)
+		return -EINVAL;
+
+	/*
+	 * We allow somebody to create a sync point on their own context.
+	 * This has the effect of delaying a command from submitting until the
+	 * dependent command has cleared.  That said we obviously can't let them
+	 * create a sync point on a future timestamp.
+	 */
+
+	if (context == cmdbatch->context) {
+		unsigned int queued = kgsl_readtimestamp(device, context,
+			KGSL_TIMESTAMP_QUEUED);
+
+		if (timestamp_cmp(sync->timestamp, queued) > 0) {
+			KGSL_DRV_ERR(device,
+			"Cannot create syncpoint for future timestamp %d (current %d)\n",
+				sync->timestamp, queued);
+			goto done;
+		}
+	}
+
+	event = kzalloc(sizeof(*event), GFP_KERNEL);
+	if (event == NULL) {
+		ret = -ENOMEM;
 		goto done;
 	}
 
-	if (param->flags & KGSL_CONTEXT_SUBMIT_IB_LIST) {
-		if (!param->numibs) {
-			result = -EINVAL;
-			goto done;
+	kref_get(&cmdbatch->refcount);
+
+	event->type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP;
+	event->cmdbatch = cmdbatch;
+	event->context = context;
+	event->timestamp = sync->timestamp;
+
+	spin_lock(&cmdbatch->lock);
+	list_add(&event->node, &cmdbatch->synclist);
+	spin_unlock(&cmdbatch->lock);
+
+	mutex_lock(&device->mutex);
+	ret = kgsl_add_event(device, context->id, sync->timestamp,
+		kgsl_cmdbatch_sync_func, event, NULL);
+	mutex_unlock(&device->mutex);
+
+	if (ret) {
+		spin_lock(&cmdbatch->lock);
+		list_del(&event->node);
+		spin_unlock(&cmdbatch->lock);
+
+		kgsl_cmdbatch_put(cmdbatch);
+		kfree(event);
+	}
+
+done:
+	if (ret)
+		kgsl_context_put(context);
+
+	return ret;
+}
+
+/**
+ * kgsl_cmdbatch_add_sync() - Add a sync point to a command batch
+ * @device: Pointer to the KGSL device struct for the GPU
+ * @cmdbatch: Pointer to the cmdbatch
+ * @sync: Pointer to the user-specified struct defining the syncpoint
+ *
+ * Create a new sync point in the cmdbatch based on the user specified
+ * parameters
+ */
+static int kgsl_cmdbatch_add_sync(struct kgsl_device *device,
+	struct kgsl_cmdbatch *cmdbatch,
+	struct kgsl_cmd_syncpoint *sync)
+{
+	void *priv;
+	int ret, psize;
+	int (*func)(struct kgsl_device *device, struct kgsl_cmdbatch *cmdbatch,
+			void *priv);
+
+	switch (sync->type) {
+	case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP:
+		psize = sizeof(struct kgsl_cmd_syncpoint_timestamp);
+		func = kgsl_cmdbatch_add_sync_timestamp;
+		break;
+	case KGSL_CMD_SYNCPOINT_TYPE_FENCE:
+		psize = sizeof(struct kgsl_cmd_syncpoint_fence);
+		func = kgsl_cmdbatch_add_sync_fence;
+		break;
+	default:
+		KGSL_DRV_ERR(device, "Invalid sync type 0x%x\n", sync->type);
+		return -EINVAL;
+	}
+
+	if (sync->size != psize) {
+		KGSL_DRV_ERR(device, "Invalid sync size %d\n", sync->size);
+		return -EINVAL;
+	}
+
+	priv = kzalloc(sync->size, GFP_KERNEL);
+	if (priv == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(priv, sync->priv, sync->size)) {
+		kfree(priv);
+		return -EFAULT;
+	}
+
+	ret = func(device, cmdbatch, priv);
+	kfree(priv);
+
+	return ret;
+}
+
+/**
+ * kgsl_cmdbatch_create() - Create a new cmdbatch structure
+ * @device: Pointer to a KGSL device struct
+ * @context: Pointer to a KGSL context struct
+ * @numibs: Number of indirect buffers to make room for in the cmdbatch
+ *
+ * Allocate an new cmdbatch structure and add enough room to store the list of
+ * indirect buffers
+ */
+static struct kgsl_cmdbatch *kgsl_cmdbatch_create(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int flags,
+		unsigned int numibs)
+{
+	struct kgsl_cmdbatch *cmdbatch = kzalloc(sizeof(*cmdbatch), GFP_KERNEL);
+	if (cmdbatch == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	/*
+	 * Increase the reference count on the context so it doesn't disappear
+	 * during the lifetime of this command batch
+	 */
+
+	if (!_kgsl_context_get(context)) {
+		kfree(cmdbatch);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!(flags & KGSL_CONTEXT_SYNC)) {
+		cmdbatch->ibdesc = kzalloc(sizeof(*cmdbatch->ibdesc) * numibs,
+			GFP_KERNEL);
+		if (cmdbatch->ibdesc == NULL) {
+			kgsl_context_put(context);
+			kfree(cmdbatch);
+			return ERR_PTR(-ENOMEM);
+		}
+	}
+
+	kref_init(&cmdbatch->refcount);
+	INIT_LIST_HEAD(&cmdbatch->synclist);
+	spin_lock_init(&cmdbatch->lock);
+
+	cmdbatch->device = device;
+	cmdbatch->ibcount = (flags & KGSL_CONTEXT_SYNC) ? 0 : numibs;
+	cmdbatch->context = context;
+	cmdbatch->flags = flags & ~KGSL_CONTEXT_SUBMIT_IB_LIST;
+
+	return cmdbatch;
+}
+
+/**
+ * _kgsl_cmdbatch_verify() - Perform a quick sanity check on a command batch
+ * @device: Pointer to a KGSL instance that owns the command batch
+ * @pagetable: Pointer to the pagetable for the current process
+ * @cmdbatch: Number of indirect buffers to make room for in the cmdbatch
+ *
+ * Do a quick sanity test on the list of indirect buffers in a command batch
+ * verifying that the size and GPU address
+ */
+static bool _kgsl_cmdbatch_verify(struct kgsl_device_private *dev_priv,
+	struct kgsl_cmdbatch *cmdbatch)
+{
+	int i;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+
+	for (i = 0; i < cmdbatch->ibcount; i++) {
+		if (cmdbatch->ibdesc[i].sizedwords == 0) {
+			KGSL_DRV_ERR(dev_priv->device,
+				"invalid size ctx %d ib(%d) %X/%X\n",
+				cmdbatch->context->id, i,
+				cmdbatch->ibdesc[i].gpuaddr,
+				cmdbatch->ibdesc[i].sizedwords);
+
+			return false;
 		}
 
+		if (!kgsl_mmu_gpuaddr_in_range(private->pagetable,
+			cmdbatch->ibdesc[i].gpuaddr)) {
+			KGSL_DRV_ERR(dev_priv->device,
+				"Invalid address ctx %d ib(%d) %X/%X\n",
+				cmdbatch->context->id, i,
+				cmdbatch->ibdesc[i].gpuaddr,
+				cmdbatch->ibdesc[i].sizedwords);
+
+			return false;
+		}
+	}
+
+	return true;
+}
+
+/**
+ * _kgsl_cmdbatch_create_legacy() - Create a cmdbatch from a legacy ioctl struct
+ * @device: Pointer to the KGSL device struct for the GPU
+ * @context: Pointer to the KGSL context that issued the command batch
+ * @param: Pointer to the kgsl_ringbuffer_issueibcmds struct that the user sent
+ *
+ * Create a command batch from the legacy issueibcmds format.
+ */
+static struct kgsl_cmdbatch *_kgsl_cmdbatch_create_legacy(
+		struct kgsl_device *device,
+		struct kgsl_context *context,
+		struct kgsl_ringbuffer_issueibcmds *param)
+{
+	struct kgsl_cmdbatch *cmdbatch =
+		kgsl_cmdbatch_create(device, context, param->flags, 1);
+
+	if (IS_ERR(cmdbatch))
+		return cmdbatch;
+
+	cmdbatch->ibdesc[0].gpuaddr = param->ibdesc_addr;
+	cmdbatch->ibdesc[0].sizedwords = param->numibs;
+	cmdbatch->ibcount = 1;
+	cmdbatch->flags = param->flags;
+
+	return cmdbatch;
+}
+
+/**
+ * _kgsl_cmdbatch_create() - Create a cmdbatch from a ioctl struct
+ * @device: Pointer to the KGSL device struct for the GPU
+ * @context: Pointer to the KGSL context that issued the command batch
+ * @flags: Flags passed in from the user command
+ * @cmdlist: Pointer to the list of commands from the user
+ * @numcmds: Number of commands in the list
+ * @synclist: Pointer to the list of syncpoints from the user
+ * @numsyncs: Number of syncpoints in the list
+ *
+ * Create a command batch from the standard issueibcmds format sent by the user.
+ */
+static struct kgsl_cmdbatch *_kgsl_cmdbatch_create(struct kgsl_device *device,
+		struct kgsl_context *context,
+		unsigned int flags,
+		unsigned int cmdlist, unsigned int numcmds,
+		unsigned int synclist, unsigned int numsyncs)
+{
+	struct kgsl_cmdbatch *cmdbatch =
+		kgsl_cmdbatch_create(device, context, flags, numcmds);
+	int ret = 0;
+
+	if (IS_ERR(cmdbatch))
+		return cmdbatch;
+
+	if (!(flags & KGSL_CONTEXT_SYNC)) {
+		if (copy_from_user(cmdbatch->ibdesc, (void __user *) cmdlist,
+			sizeof(struct kgsl_ibdesc) * numcmds)) {
+			ret = -EFAULT;
+			goto done;
+		}
+	}
+
+	if (synclist && numsyncs) {
+		struct kgsl_cmd_syncpoint sync;
+		void  __user *uptr = (void __user *) synclist;
+		int i;
+
+		for (i = 0; i < numsyncs; i++) {
+			memset(&sync, 0, sizeof(sync));
+
+			if (copy_from_user(&sync, uptr, sizeof(sync))) {
+				ret = -EFAULT;
+				break;
+			}
+
+			ret = kgsl_cmdbatch_add_sync(device, cmdbatch, &sync);
+
+			if (ret)
+				break;
+
+			uptr += sizeof(sync);
+		}
+	}
+
+done:
+	if (ret) {
+		kgsl_cmdbatch_destroy(cmdbatch);
+		return ERR_PTR(ret);
+	}
+
+	return cmdbatch;
+}
+
+static long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
+				      unsigned int cmd, void *data)
+{
+	struct kgsl_ringbuffer_issueibcmds *param = data;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_context *context;
+	struct kgsl_cmdbatch *cmdbatch;
+	long result = -EINVAL;
+
+	/* The legacy functions don't support synchronization commands */
+	if (param->flags & KGSL_CONTEXT_SYNC)
+		return -EINVAL;
+
+	/* Get the context */
+	context = kgsl_context_get_owner(dev_priv, param->drawctxt_id);
+	if (context == NULL)
+		goto done;
+
+	if (param->flags & KGSL_CONTEXT_SUBMIT_IB_LIST) {
 		/*
-		 * Put a reasonable upper limit on the number of IBs that can be
-		 * submitted
+		 * Do a quick sanity check on the number of IBs in the
+		 * submission
 		 */
 
-		if (param->numibs > 10000) {
-			result = -EINVAL;
+		if (param->numibs == 0 || param->numibs > KGSL_MAX_NUMIBS)
 			goto done;
-		}
 
-		ibdesc = kzalloc(sizeof(struct kgsl_ibdesc) * param->numibs,
-					GFP_KERNEL);
-		if (!ibdesc) {
-			KGSL_MEM_ERR(dev_priv->device,
-				"kzalloc(%d) failed\n",
-				sizeof(struct kgsl_ibdesc) * param->numibs);
-			result = -ENOMEM;
-			goto done;
-		}
+		cmdbatch = _kgsl_cmdbatch_create(device, context, param->flags,
+				param->ibdesc_addr, param->numibs, 0, 0);
+	} else
+		cmdbatch = _kgsl_cmdbatch_create_legacy(device, context, param);
 
-		if (copy_from_user(ibdesc, (void *)param->ibdesc_addr,
-				sizeof(struct kgsl_ibdesc) * param->numibs)) {
-			result = -EFAULT;
-			KGSL_DRV_ERR(dev_priv->device,
-				"copy_from_user failed\n");
-			goto free_ibdesc;
-		}
-	} else {
-		KGSL_DRV_INFO(dev_priv->device,
-			"Using single IB submission mode for ib submission\n");
-		/* If user space driver is still using the old mode of
-		 * submitting single ib then we need to support that as well */
-		ibdesc = kzalloc(sizeof(struct kgsl_ibdesc), GFP_KERNEL);
-		if (!ibdesc) {
-			KGSL_MEM_ERR(dev_priv->device,
-				"kzalloc(%d) failed\n",
-				sizeof(struct kgsl_ibdesc));
-			result = -ENOMEM;
-			goto done;
-		}
-		ibdesc[0].gpuaddr = param->ibdesc_addr;
-		ibdesc[0].sizedwords = param->numibs;
-		param->numibs = 1;
+	if (IS_ERR(cmdbatch)) {
+		result = PTR_ERR(cmdbatch);
+		goto done;
 	}
 
-	for (i = 0; i < param->numibs; i++) {
-		struct kgsl_pagetable *pt = dev_priv->process_priv->pagetable;
+	/* Run basic sanity checking on the command */
+	if (!_kgsl_cmdbatch_verify(dev_priv, cmdbatch))
+		goto free_cmdbatch;
 
-		if (!kgsl_mmu_gpuaddr_in_range(pt, ibdesc[i].gpuaddr)) {
-			result = -ERANGE;
-			KGSL_DRV_ERR(dev_priv->device,
-				     "invalid ib base GPU virtual addr %x\n",
-				     ibdesc[i].gpuaddr);
-			goto free_ibdesc;
-		}
+	result = dev_priv->device->ftbl->issueibcmds(dev_priv, context,
+		cmdbatch, &param->timestamp);
+
+free_cmdbatch:
+	/*
+	 * -EPROTO is a "success" error - it just tells the user that the
+	 * context had previously faulted
+	 */
+	if (result && result != -EPROTO)
+		kgsl_cmdbatch_destroy(cmdbatch);
+
+done:
+	kgsl_context_put(context);
+	return result;
+}
+
+static long kgsl_ioctl_submit_commands(struct kgsl_device_private *dev_priv,
+				      unsigned int cmd, void *data)
+{
+	struct kgsl_submit_commands *param = data;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_context *context;
+	struct kgsl_cmdbatch *cmdbatch;
+
+	long result = -EINVAL;
+
+	/* The number of IBs are completely ignored for sync commands */
+	if (!(param->flags & KGSL_CONTEXT_SYNC)) {
+		if (param->numcmds == 0 || param->numcmds > KGSL_MAX_NUMIBS)
+			return -EINVAL;
+	} else if (param->numcmds != 0) {
+		KGSL_DEV_ERR_ONCE(device,
+			"Commands specified with the SYNC flag.  They will be ignored\n");
 	}
 
-	result = dev_priv->device->ftbl->issueibcmds(dev_priv,
-					     context,
-					     ibdesc,
-					     param->numibs,
-					     &param->timestamp,
-					     param->flags);
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+	if (context == NULL)
+		return -EINVAL;
 
-free_ibdesc:
-	kfree(ibdesc);
+	cmdbatch = _kgsl_cmdbatch_create(device, context, param->flags,
+		(unsigned int) param->cmdlist, param->numcmds,
+		(unsigned int) param->synclist, param->numsyncs);
+
+	if (IS_ERR(cmdbatch)) {
+		result = PTR_ERR(cmdbatch);
+		goto done;
+	}
+
+	/* Run basic sanity checking on the command */
+	if (!_kgsl_cmdbatch_verify(dev_priv, cmdbatch))
+		goto free_cmdbatch;
+
+	result = dev_priv->device->ftbl->issueibcmds(dev_priv, context,
+		cmdbatch, &param->timestamp);
+
+free_cmdbatch:
+	/*
+	 * -EPROTO is a "success" error - it just tells the user that the
+	 * context had previously faulted
+	 */
+	if (result && result != -EPROTO)
+		kgsl_cmdbatch_destroy(cmdbatch);
+
 done:
 	kgsl_context_put(context);
 	return result;
@@ -1666,14 +2189,11 @@
 {
 	struct kgsl_drawctxt_destroy *param = data;
 	struct kgsl_context *context;
-	long result = -EINVAL;
+	long result;
 
 	context = kgsl_context_get_owner(dev_priv, param->drawctxt_id);
 
-	if (context) {
-		kgsl_context_detach(context);
-		result = 0;
-	}
+	result = kgsl_context_detach(context);
 
 	kgsl_context_put(context);
 	return result;
@@ -2763,7 +3283,6 @@
 		{ .cmd = (_cmd), .func = (_func), .flags = (_flags) }
 
 #define KGSL_IOCTL_LOCK		BIT(0)
-#define KGSL_IOCTL_WAKE		BIT(1)
 
 static const struct {
 	unsigned int cmd;
@@ -2775,13 +3294,14 @@
 			KGSL_IOCTL_LOCK),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP,
 			kgsl_ioctl_device_waittimestamp,
-			KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
+			KGSL_IOCTL_LOCK),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID,
 			kgsl_ioctl_device_waittimestamp_ctxtid,
-			KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
+			KGSL_IOCTL_LOCK),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS,
-			kgsl_ioctl_rb_issueibcmds,
-			KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
+			kgsl_ioctl_rb_issueibcmds, 0),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS,
+			kgsl_ioctl_submit_commands, 0),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP,
 			kgsl_ioctl_cmdstream_readtimestamp,
 			KGSL_IOCTL_LOCK),
@@ -2799,7 +3319,7 @@
 			KGSL_IOCTL_LOCK),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY,
 			kgsl_ioctl_drawctxt_destroy,
-			KGSL_IOCTL_LOCK | KGSL_IOCTL_WAKE),
+			KGSL_IOCTL_LOCK),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM,
 			kgsl_ioctl_map_user_mem, 0),
 	KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FROM_PMEM,
@@ -2837,7 +3357,7 @@
 	struct kgsl_device_private *dev_priv = filep->private_data;
 	unsigned int nr;
 	kgsl_ioctl_func_t func;
-	int lock, ret, use_hw;
+	int lock, ret;
 	char ustack[64];
 	void *uptr = NULL;
 
@@ -2895,7 +3415,6 @@
 
 		func = kgsl_ioctl_funcs[nr].func;
 		lock = kgsl_ioctl_funcs[nr].flags & KGSL_IOCTL_LOCK;
-		use_hw = kgsl_ioctl_funcs[nr].flags & KGSL_IOCTL_WAKE;
 	} else {
 		func = dev_priv->device->ftbl->ioctl;
 		if (!func) {
@@ -2905,26 +3424,15 @@
 			goto done;
 		}
 		lock = 1;
-		use_hw = 1;
 	}
 
-	if (lock) {
+	if (lock)
 		mutex_lock(&dev_priv->device->mutex);
-		if (use_hw) {
-			ret = kgsl_active_count_get(dev_priv->device);
-			if (ret < 0)
-				goto unlock;
-		}
-	}
 
 	ret = func(dev_priv, cmd, uptr);
 
-unlock:
-	if (lock) {
-		if (use_hw)
-			kgsl_active_count_put(dev_priv->device);
+	if (lock)
 		mutex_unlock(&dev_priv->device->mutex);
-	}
 
 	/*
 	 * Still copy back on failure, but assume function took
@@ -2980,7 +3488,8 @@
 static void kgsl_gpumem_vm_open(struct vm_area_struct *vma)
 {
 	struct kgsl_mem_entry *entry = vma->vm_private_data;
-	kgsl_mem_entry_get(entry);
+	if (!kgsl_mem_entry_get(entry))
+		vma->vm_private_data = NULL;
 }
 
 static int
@@ -2988,6 +3497,8 @@
 {
 	struct kgsl_mem_entry *entry = vma->vm_private_data;
 
+	if (!entry)
+		return VM_FAULT_SIGBUS;
 	if (!entry->memdesc.ops || !entry->memdesc.ops->vmfault)
 		return VM_FAULT_SIGBUS;
 
@@ -2999,6 +3510,9 @@
 {
 	struct kgsl_mem_entry *entry  = vma->vm_private_data;
 
+	if (!entry)
+		return;
+
 	entry->memdesc.useraddr = 0;
 	kgsl_mem_entry_put(entry);
 }
@@ -3076,14 +3590,11 @@
 	if (ret)
 		return ret;
 
-	if (!kgsl_memdesc_use_cpu_map(&entry->memdesc) || (flags & MAP_FIXED)) {
-		/*
-		 * If we're not going to use the same mapping on the gpu,
-		 * any address is fine.
-		 * For MAP_FIXED, hopefully the caller knows what they're doing,
-		 * but we may fail in mmap() if there is already something
-		 * at the virtual address chosen.
-		 */
+	/*
+	 * If we're not going to use CPU map feature, get an ordinary mapping
+	 * with nothing more to be done.
+	 */
+	if (!kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
 		ret = get_unmapped_area(NULL, addr, len, pgoff, flags);
 		goto put;
 	}
@@ -3175,7 +3686,7 @@
 		} else {
 			ret = -EBUSY;
 		}
-	} while (mmap_range_valid(addr, len));
+	} while (!(flags & MAP_FIXED) && mmap_range_valid(addr, len));
 
 	if (IS_ERR_VALUE(ret))
 		KGSL_MEM_ERR(device,
@@ -3463,7 +3974,6 @@
 
 
 	setup_timer(&device->idle_timer, kgsl_timer, (unsigned long) device);
-	setup_timer(&device->hang_timer, hang_timer, (unsigned long) device);
 	status = kgsl_create_device_workqueue(device);
 	if (status)
 		goto error_pwrctrl_close;
@@ -3523,7 +4033,6 @@
 
 		if (device->state == KGSL_STATE_ACTIVE)
 			kgsl_idle(device);
-
 	}
 
 	if (device->pm_dump_enable) {
@@ -3537,13 +4046,12 @@
 			pwr->power_flags, pwr->active_pwrlevel);
 
 		KGSL_LOG_DUMP(device, "POWER: INTERVAL TIMEOUT = %08X ",
-			pwr->interval_timeout);
+				pwr->interval_timeout);
 
 	}
 
 	/* Disable the idle timer so we don't get interrupted */
 	del_timer_sync(&device->idle_timer);
-	del_timer_sync(&device->hang_timer);
 
 	/* Force on the clocks */
 	kgsl_pwrctrl_wake(device);
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
index 0525eab..ee7a485 100644
--- a/drivers/gpu/msm/kgsl.h
+++ b/drivers/gpu/msm/kgsl.h
@@ -78,6 +78,8 @@
 
 #define KGSL_MEMFREE_HIST_SIZE	((int)(PAGE_SIZE * 2))
 
+#define KGSL_MAX_NUMIBS 100000
+
 struct kgsl_memfree_hist_elem {
 	unsigned int pid;
 	unsigned int gpuaddr;
@@ -141,6 +143,7 @@
 
 struct kgsl_pagetable;
 struct kgsl_memdesc;
+struct kgsl_cmdbatch;
 
 struct kgsl_memdesc_ops {
 	int (*vmflags)(struct kgsl_memdesc *);
@@ -205,7 +208,6 @@
 #define MMU_CONFIG 1
 #endif
 
-void kgsl_hang_check(struct work_struct *work);
 void kgsl_mem_entry_destroy(struct kref *kref);
 int kgsl_postmortem_dump(struct kgsl_device *device, int manual);
 
@@ -237,7 +239,7 @@
 		unsigned int value);
 
 void kgsl_trace_issueibcmds(struct kgsl_device *device, int id,
-		struct kgsl_ibdesc *ibdesc, int numibs,
+		struct kgsl_cmdbatch *cmdbatch,
 		unsigned int timestamp, unsigned int flags,
 		int result, unsigned int type);
 
@@ -318,10 +320,10 @@
 	return ((a > b) && (a - b <= KGSL_TIMESTAMP_WINDOW)) ? 1 : -1;
 }
 
-static inline void
+static inline int
 kgsl_mem_entry_get(struct kgsl_mem_entry *entry)
 {
-	kref_get(&entry->refcount);
+	return kref_get_unless_zero(&entry->refcount);
 }
 
 static inline void
diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c
index e623515..318b10d 100644
--- a/drivers/gpu/msm/kgsl_debugfs.c
+++ b/drivers/gpu/msm/kgsl_debugfs.c
@@ -122,7 +122,6 @@
 KGSL_DEBUGFS_LOG(ctxt_log);
 KGSL_DEBUGFS_LOG(mem_log);
 KGSL_DEBUGFS_LOG(pwr_log);
-KGSL_DEBUGFS_LOG(ft_log);
 
 static int memfree_hist_print(struct seq_file *s, void *unused)
 {
@@ -191,8 +190,6 @@
 				&pwr_log_fops);
 	debugfs_create_file("memfree_history", 0444, device->d_debugfs, device,
 				&memfree_hist_fops);
-	debugfs_create_file("log_level_ft", 0644, device->d_debugfs, device,
-				&ft_log_fops);
 
 	/* Create postmortem dump control files */
 
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index 0b5fe52..f87c64c 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -13,6 +13,7 @@
 #ifndef __KGSL_DEVICE_H
 #define __KGSL_DEVICE_H
 
+#include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/pm_qos.h>
 #include <linux/sched.h>
@@ -47,7 +48,6 @@
 #define KGSL_STATE_SLEEP	0x00000008
 #define KGSL_STATE_SUSPEND	0x00000010
 #define KGSL_STATE_HUNG		0x00000020
-#define KGSL_STATE_DUMP_AND_FT	0x00000040
 #define KGSL_STATE_SLUMBER	0x00000080
 
 #define KGSL_GRAPHICS_MEMORY_LOW_WATERMARK  0x1000000
@@ -76,6 +76,7 @@
 struct kgsl_context;
 struct kgsl_power_stats;
 struct kgsl_event;
+struct kgsl_cmdbatch;
 
 struct kgsl_functable {
 	/* Mandatory functions - these functions must be implemented
@@ -87,7 +88,7 @@
 	void (*regwrite) (struct kgsl_device *device,
 		unsigned int offsetwords, unsigned int value);
 	int (*idle) (struct kgsl_device *device);
-	unsigned int (*isidle) (struct kgsl_device *device);
+	bool (*isidle) (struct kgsl_device *device);
 	int (*suspend_context) (struct kgsl_device *device);
 	int (*init) (struct kgsl_device *device);
 	int (*start) (struct kgsl_device *device);
@@ -101,9 +102,8 @@
 	unsigned int (*readtimestamp) (struct kgsl_device *device,
 		struct kgsl_context *context, enum kgsl_timestamp_type type);
 	int (*issueibcmds) (struct kgsl_device_private *dev_priv,
-		struct kgsl_context *context, struct kgsl_ibdesc *ibdesc,
-		unsigned int sizedwords, uint32_t *timestamp,
-		unsigned int flags);
+		struct kgsl_context *context, struct kgsl_cmdbatch *cmdbatch,
+		uint32_t *timestamps);
 	int (*setup_pt)(struct kgsl_device *device,
 		struct kgsl_pagetable *pagetable);
 	void (*cleanup_pt)(struct kgsl_device *device,
@@ -115,14 +115,15 @@
 	void * (*snapshot)(struct kgsl_device *device, void *snapshot,
 		int *remain, int hang);
 	irqreturn_t (*irq_handler)(struct kgsl_device *device);
+	int (*drain)(struct kgsl_device *device);
 	/* Optional functions - these functions are not mandatory.  The
 	   driver will check that the function pointer is not NULL before
 	   calling the hook */
-	void (*setstate) (struct kgsl_device *device, unsigned int context_id,
+	int (*setstate) (struct kgsl_device *device, unsigned int context_id,
 			uint32_t flags);
 	struct kgsl_context *(*drawctxt_create) (struct kgsl_device_private *,
 						uint32_t *flags);
-	void (*drawctxt_detach) (struct kgsl_context *context);
+	int (*drawctxt_detach) (struct kgsl_context *context);
 	void (*drawctxt_destroy) (struct kgsl_context *context);
 	long (*ioctl) (struct kgsl_device_private *dev_priv,
 		unsigned int cmd, void *data);
@@ -132,6 +133,9 @@
 	int (*postmortem_dump) (struct kgsl_device *device, int manual);
 	int (*next_event)(struct kgsl_device *device,
 		struct kgsl_event *event);
+	void (*drawctxt_sched)(struct kgsl_device *device,
+		struct kgsl_context *context);
+	void (*resume)(struct kgsl_device *device);
 };
 
 /* MH register values */
@@ -155,6 +159,56 @@
 	unsigned int created;
 };
 
+/**
+ * struct kgsl_cmdbatch - KGSl command descriptor
+ * @device: KGSL GPU device that the command was created for
+ * @context: KGSL context that created the command
+ * @timestamp: Timestamp assigned to the command
+ * @flags: flags
+ * @priv: Internal flags
+ * @fault_policy: Internal policy describing how to handle this command in case
+ * of a fault
+ * @fault_recovery: recovery actions actually tried for this batch
+ * @ibcount: Number of IBs in the command list
+ * @ibdesc: Pointer to the list of IBs
+ * @expires: Point in time when the cmdbatch is considered to be hung
+ * @invalid:  non-zero if the dispatcher determines the command and the owning
+ * context should be invalidated
+ * @refcount: kref structure to maintain the reference count
+ * @synclist: List of context/timestamp tuples to wait for before issuing
+ *
+ * This struture defines an atomic batch of command buffers issued from
+ * userspace.
+ */
+struct kgsl_cmdbatch {
+	struct kgsl_device *device;
+	struct kgsl_context *context;
+	spinlock_t lock;
+	uint32_t timestamp;
+	uint32_t flags;
+	unsigned long priv;
+	unsigned long fault_policy;
+	unsigned long fault_recovery;
+	uint32_t ibcount;
+	struct kgsl_ibdesc *ibdesc;
+	unsigned long expires;
+	int invalid;
+	struct kref refcount;
+	struct list_head synclist;
+};
+
+/**
+ * enum kgsl_cmdbatch_priv - Internal cmdbatch flags
+ * @CMDBATCH_FLAG_SKIP - skip the entire command batch
+ * @CMDBATCH_FLAG_FORCE_PREAMBLE - Force the preamble on for the cmdbatch
+ * @CMDBATCH_FLAG_WFI - Force wait-for-idle for the submission
+ */
+
+enum kgsl_cmdbatch_priv {
+	CMDBATCH_FLAG_SKIP = 0,
+	CMDBATCH_FLAG_FORCE_PREAMBLE,
+	CMDBATCH_FLAG_WFI,
+};
 
 struct kgsl_device {
 	struct device *dev;
@@ -188,11 +242,10 @@
 	struct kgsl_mh mh;
 	struct kgsl_mmu mmu;
 	struct completion hwaccess_gate;
+	struct completion cmdbatch_gate;
 	const struct kgsl_functable *ftbl;
 	struct work_struct idle_check_ws;
-	struct work_struct hang_check_ws;
 	struct timer_list idle_timer;
-	struct timer_list hang_timer;
 	struct kgsl_pwrctrl pwrctrl;
 	int open_count;
 
@@ -206,7 +259,6 @@
 	wait_queue_head_t active_cnt_wq;
 	struct workqueue_struct *work_queue;
 	struct device *parentdev;
-	struct completion ft_gate;
 	struct dentry *d_debugfs;
 	struct idr context_idr;
 	rwlock_t context_lock;
@@ -233,13 +285,13 @@
 	int drv_log;
 	int mem_log;
 	int pwr_log;
-	int ft_log;
 	int pm_dump_enable;
 	struct kgsl_pwrscale pwrscale;
 	struct kobject pwrscale_kobj;
 	struct work_struct ts_expired_ws;
 	struct list_head events;
 	struct list_head events_pending_list;
+	unsigned int events_last_timestamp;
 	s64 on_time;
 
 	/* Postmortem Control switches */
@@ -254,11 +306,9 @@
 
 #define KGSL_DEVICE_COMMON_INIT(_dev) \
 	.hwaccess_gate = COMPLETION_INITIALIZER((_dev).hwaccess_gate),\
-	.ft_gate = COMPLETION_INITIALIZER((_dev).ft_gate),\
+	.cmdbatch_gate = COMPLETION_INITIALIZER((_dev).cmdbatch_gate),\
 	.idle_check_ws = __WORK_INITIALIZER((_dev).idle_check_ws,\
 			kgsl_idle_check),\
-	.hang_check_ws = __WORK_INITIALIZER((_dev).hang_check_ws,\
-			kgsl_hang_check),\
 	.ts_expired_ws  = __WORK_INITIALIZER((_dev).ts_expired_ws,\
 			kgsl_process_events),\
 	.context_idr = IDR_INIT((_dev).context_idr),\
@@ -278,6 +328,7 @@
 /* the context has caused a pagefault */
 #define KGSL_CONTEXT_PAGEFAULT 1
 
+struct kgsl_process_private;
 /**
  * struct kgsl_context - Master structure for a KGSL context object
  * @refcount: kref object for reference counting the context
@@ -294,9 +345,9 @@
  * @events_list: list node for the list of all contexts that have pending events
  * @pid: process that owns this context.
  * @tid: task that created this context.
- * @pagefault: flag set if this context caused a pagefault.
  * @pagefault_ts: global timestamp of the pagefault, if KGSL_CONTEXT_PAGEFAULT
  * is set.
+ * @flags: flags from userspace controlling the behavior of this context
  */
 struct kgsl_context {
 	struct kref refcount;
@@ -304,15 +355,16 @@
 	pid_t pid;
 	pid_t tid;
 	struct kgsl_device_private *dev_priv;
+	struct kgsl_process_private *proc_priv;
 	unsigned long priv;
 	struct kgsl_device *device;
-	struct kgsl_pagetable *pagetable;
 	unsigned int reset_status;
 	bool wait_on_invalid_ts;
 	struct sync_timeline *timeline;
 	struct list_head events;
 	struct list_head events_list;
 	unsigned int pagefault_ts;
+	unsigned int flags;
 };
 
 /**
@@ -376,6 +428,9 @@
 int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts,
 	kgsl_event_func func, void *priv, void *owner);
 
+void kgsl_cancel_event(struct kgsl_device *device, struct kgsl_context *context,
+		unsigned int timestamp, kgsl_event_func func, void *priv);
+
 static inline void kgsl_process_add_stats(struct kgsl_process_private *priv,
 	unsigned int type, size_t size)
 {
@@ -463,8 +518,6 @@
 	return 0;
 }
 
-
-
 int kgsl_check_timestamp(struct kgsl_device *device,
 		struct kgsl_context *context, unsigned int timestamp);
 
@@ -491,6 +544,7 @@
 
 int kgsl_context_init(struct kgsl_device_private *, struct kgsl_context
 		*context);
+int kgsl_context_detach(struct kgsl_context *context);
 
 /**
  * kgsl_context_put() - Release context reference count
@@ -535,6 +589,7 @@
 static inline struct kgsl_context *kgsl_context_get(struct kgsl_device *device,
 		uint32_t id)
 {
+	int result = 0;
 	struct kgsl_context *context = NULL;
 
 	read_lock(&device->context_lock);
@@ -545,10 +600,12 @@
 	if (kgsl_context_detached(context))
 		context = NULL;
 	else
-		kref_get(&context->refcount);
+		result = kref_get_unless_zero(&context->refcount);
 
 	read_unlock(&device->context_lock);
 
+	if (!result)
+		return NULL;
 	return context;
 }
 
@@ -560,10 +617,22 @@
 * lightweight way to just increase the refcount on a known context rather than
 * walking through kgsl_context_get and searching the iterator
 */
-static inline void _kgsl_context_get(struct kgsl_context *context)
+static inline int _kgsl_context_get(struct kgsl_context *context)
 {
-	if (context)
-		kref_get(&context->refcount);
+	int ret = 0;
+
+	if (context) {
+		ret = kref_get_unless_zero(&context->refcount);
+		/*
+		 * We shouldn't realistically fail kref_get_unless_zero unless
+		 * we did something really dumb so make the failure both public
+		 * and painful
+		 */
+
+		WARN_ON(!ret);
+	}
+
+	return ret;
 }
 
 /**
@@ -620,4 +689,40 @@
 {
 	kgsl_signal_event(device, context, timestamp, KGSL_EVENT_CANCELLED);
 }
+
+void kgsl_cmdbatch_destroy(struct kgsl_cmdbatch *cmdbatch);
+
+void kgsl_cmdbatch_destroy_object(struct kref *kref);
+
+/**
+ * kgsl_cmdbatch_put() - Decrement the refcount for a command batch object
+ * @cmdbatch: Pointer to the command batch object
+ */
+static inline void kgsl_cmdbatch_put(struct kgsl_cmdbatch *cmdbatch)
+{
+	if (cmdbatch)
+		kref_put(&cmdbatch->refcount, kgsl_cmdbatch_destroy_object);
+}
+
+/**
+ * kgsl_cmdbatch_sync_pending() - return true if the cmdbatch is waiting
+ * @cmdbatch: Pointer to the command batch object to check
+ *
+ * Return non-zero if the specified command batch is still waiting for sync
+ * point dependencies to be satisfied
+ */
+static inline int kgsl_cmdbatch_sync_pending(struct kgsl_cmdbatch *cmdbatch)
+{
+	int ret;
+
+	if (cmdbatch == NULL)
+		return 0;
+
+	spin_lock(&cmdbatch->lock);
+	ret = list_empty(&cmdbatch->synclist) ? 0 : 1;
+	spin_unlock(&cmdbatch->lock);
+
+	return ret;
+}
+
 #endif  /* __KGSL_DEVICE_H */
diff --git a/drivers/gpu/msm/kgsl_events.c b/drivers/gpu/msm/kgsl_events.c
index 9e8f6d0..e21fd88 100644
--- a/drivers/gpu/msm/kgsl_events.c
+++ b/drivers/gpu/msm/kgsl_events.c
@@ -48,7 +48,8 @@
 {
 	int id = event->context ? event->context->id : KGSL_MEMSTORE_GLOBAL;
 
-	trace_kgsl_fire_event(id, timestamp, type, jiffies - event->created);
+	trace_kgsl_fire_event(id, timestamp, type, jiffies - event->created,
+		event->func);
 
 	if (event->func)
 		event->func(device, event->priv, id, timestamp, type);
@@ -56,8 +57,6 @@
 	list_del(&event->list);
 	kgsl_context_put(event->context);
 	kfree(event);
-
-	kgsl_active_count_put(device);
 }
 
 static void _retire_events(struct kgsl_device *device,
@@ -210,9 +209,8 @@
 int kgsl_add_event(struct kgsl_device *device, u32 id, u32 ts,
 	kgsl_event_func func, void *priv, void *owner)
 {
-	int ret;
 	struct kgsl_event *event;
-	unsigned int cur_ts;
+	unsigned int queued, cur_ts;
 	struct kgsl_context *context = NULL;
 
 	BUG_ON(!mutex_is_locked(&device->mutex));
@@ -225,6 +223,14 @@
 		if (context == NULL)
 			return -EINVAL;
 	}
+
+	queued = kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED);
+
+	if (timestamp_cmp(ts, queued) > 0) {
+		kgsl_context_put(context);
+		return -EINVAL;
+	}
+
 	cur_ts = kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED);
 
 	/*
@@ -235,7 +241,7 @@
 	 */
 
 	if (timestamp_cmp(cur_ts, ts) >= 0) {
-		trace_kgsl_fire_event(id, cur_ts, ts, 0);
+		trace_kgsl_fire_event(id, cur_ts, ts, 0, func);
 
 		func(device, priv, id, ts, KGSL_EVENT_TIMESTAMP_RETIRED);
 		kgsl_context_put(context);
@@ -248,17 +254,6 @@
 		return -ENOMEM;
 	}
 
-	/*
-	 * Increase the active count on the device to avoid going into power
-	 * saving modes while events are pending
-	 */
-	ret = kgsl_active_count_get(device);
-	if (ret < 0) {
-		kgsl_context_put(context);
-		kfree(event);
-		return ret;
-	}
-
 	event->context = context;
 	event->timestamp = ts;
 	event->priv = priv;
@@ -266,7 +261,7 @@
 	event->owner = owner;
 	event->created = jiffies;
 
-	trace_kgsl_register_event(id, ts);
+	trace_kgsl_register_event(id, ts, func);
 
 	/* Add the event to either the owning context or the global list */
 
@@ -333,7 +328,11 @@
 		void *priv)
 {
 	struct kgsl_event *event;
-	struct list_head *head = _get_list_head(device, context);
+	struct list_head *head;
+
+	BUG_ON(!mutex_is_locked(&device->mutex));
+
+	head = _get_list_head(device, context);
 
 	event = _find_event(device, head, timestamp, func, priv);
 
@@ -402,7 +401,6 @@
 
 	mutex_lock(&device->mutex);
 
-	/* Process expired global events */
 	timestamp = kgsl_readtimestamp(device, NULL, KGSL_TIMESTAMP_RETIRED);
 	_retire_events(device, &device->events, timestamp);
 	_mark_next_event(device, &device->events);
@@ -415,15 +413,17 @@
 		 * Increment the refcount to make sure that the list_del_init
 		 * is called with a valid context's list
 		 */
-		_kgsl_context_get(context);
-		/*
-		 * If kgsl_timestamp_expired_context returns 0 then it no longer
-		 * has any pending events and can be removed from the list
-		 */
+		if (_kgsl_context_get(context)) {
+			/*
+			 * If kgsl_timestamp_expired_context returns 0 then it
+			 * no longer has any pending events and can be removed
+			 * from the list
+			 */
 
-		if (kgsl_process_context_events(device, context) == 0)
-			list_del_init(&context->events_list);
-		kgsl_context_put(context);
+			if (kgsl_process_context_events(device, context) == 0)
+				list_del_init(&context->events_list);
+			kgsl_context_put(context);
+		}
 	}
 
 	mutex_unlock(&device->mutex);
diff --git a/drivers/gpu/msm/kgsl_gpummu.c b/drivers/gpu/msm/kgsl_gpummu.c
index 68052b1..2634e4f 100644
--- a/drivers/gpu/msm/kgsl_gpummu.c
+++ b/drivers/gpu/msm/kgsl_gpummu.c
@@ -482,15 +482,17 @@
 	return NULL;
 }
 
-static void kgsl_gpummu_default_setstate(struct kgsl_mmu *mmu,
+static int kgsl_gpummu_default_setstate(struct kgsl_mmu *mmu,
 					uint32_t flags)
 {
 	struct kgsl_gpummu_pt *gpummu_pt;
 	if (!kgsl_mmu_enabled())
-		return;
+		return 0;
 
 	if (flags & KGSL_MMUFLAGS_PTUPDATE) {
-		kgsl_idle(mmu->device);
+		int ret = kgsl_idle(mmu->device);
+		if (ret)
+			return ret;
 		gpummu_pt = mmu->hwpagetable->priv;
 		kgsl_regwrite(mmu->device, MH_MMU_PT_BASE,
 			gpummu_pt->base.gpuaddr);
@@ -500,12 +502,16 @@
 		/* Invalidate all and tc */
 		kgsl_regwrite(mmu->device, MH_MMU_INVALIDATE,  0x00000003);
 	}
+
+	return 0;
 }
 
-static void kgsl_gpummu_setstate(struct kgsl_mmu *mmu,
+static int kgsl_gpummu_setstate(struct kgsl_mmu *mmu,
 				struct kgsl_pagetable *pagetable,
 				unsigned int context_id)
 {
+	int ret = 0;
+
 	if (mmu->flags & KGSL_FLAGS_STARTED) {
 		/* page table not current, then setup mmu to use new
 		 *  specified page table
@@ -518,10 +524,13 @@
 			kgsl_mmu_pt_get_flags(pagetable, mmu->device->id);
 
 			/* call device specific set page table */
-			kgsl_setstate(mmu, context_id, KGSL_MMUFLAGS_TLBFLUSH |
+			ret = kgsl_setstate(mmu, context_id,
+				KGSL_MMUFLAGS_TLBFLUSH |
 				KGSL_MMUFLAGS_PTUPDATE);
 		}
 	}
+
+	return ret;
 }
 
 static int kgsl_gpummu_init(struct kgsl_mmu *mmu)
@@ -563,6 +572,7 @@
 
 	struct kgsl_device *device = mmu->device;
 	struct kgsl_gpummu_pt *gpummu_pt;
+	int ret;
 
 	if (mmu->flags & KGSL_FLAGS_STARTED)
 		return 0;
@@ -574,9 +584,6 @@
 	/* setup MMU and sub-client behavior */
 	kgsl_regwrite(device, MH_MMU_CONFIG, mmu->config);
 
-	/* idle device */
-	kgsl_idle(device);
-
 	/* enable axi interrupts */
 	kgsl_regwrite(device, MH_INTERRUPT_MASK,
 			GSL_MMU_INT_MASK | MH_INTERRUPT_MASK__MMU_PAGE_FAULT);
@@ -607,10 +614,12 @@
 	kgsl_regwrite(mmu->device, MH_MMU_VA_RANGE,
 		      (KGSL_PAGETABLE_BASE |
 		      (CONFIG_MSM_KGSL_PAGE_TABLE_SIZE >> 16)));
-	kgsl_setstate(mmu, KGSL_MEMSTORE_GLOBAL, KGSL_MMUFLAGS_TLBFLUSH);
-	mmu->flags |= KGSL_FLAGS_STARTED;
 
-	return 0;
+	ret = kgsl_setstate(mmu, KGSL_MEMSTORE_GLOBAL, KGSL_MMUFLAGS_TLBFLUSH);
+	if (!ret)
+		mmu->flags |= KGSL_FLAGS_STARTED;
+
+	return ret;
 }
 
 static int
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index a06ebbf..246295b 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -334,6 +334,15 @@
 	ret = get_iommu_unit(dev, &mmu, &iommu_unit);
 	if (ret)
 		goto done;
+
+	device = mmu->device;
+	adreno_dev = ADRENO_DEVICE(device);
+	if (atomic_read(&mmu->fault)) {
+		if (adreno_dev->ft_pf_policy & KGSL_FT_PAGEFAULT_GPUHALT_ENABLE)
+			ret = -EBUSY;
+		goto done;
+	}
+
 	iommu_dev = get_iommu_device(iommu_unit, dev);
 	if (!iommu_dev) {
 		KGSL_CORE_ERR("Invalid IOMMU device %p\n", dev);
@@ -341,8 +350,38 @@
 		goto done;
 	}
 	iommu = mmu->priv;
-	device = mmu->device;
-	adreno_dev = ADRENO_DEVICE(device);
+
+	/*
+	 * set the fault bits and stuff before any printks so that if fault
+	 * handler runs then it will know it's dealing with a pagefault
+	 */
+	kgsl_sharedmem_readl(&device->memstore, &curr_context_id,
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context));
+
+	context = kgsl_context_get(device, curr_context_id);
+
+	if (context != NULL) {
+		kgsl_sharedmem_readl(&device->memstore, &curr_global_ts,
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+			eoptimestamp));
+
+		/* save pagefault timestamp for GFT */
+		set_bit(KGSL_CONTEXT_PAGEFAULT, &context->priv);
+		context->pagefault_ts = curr_global_ts;
+
+		kgsl_context_put(context);
+		context = NULL;
+	}
+
+	atomic_set(&mmu->fault, 1);
+	iommu_dev->fault = 1;
+
+	if (adreno_dev->ft_pf_policy & KGSL_FT_PAGEFAULT_GPUHALT_ENABLE) {
+		adreno_set_gpu_fault(adreno_dev, ADRENO_IOMMU_PAGE_FAULT);
+		/* turn off GPU IRQ so we don't get faults from it too */
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+		adreno_dispatcher_schedule(device);
+	}
 
 	ptbase = KGSL_IOMMU_GET_CTX_REG(iommu, iommu_unit,
 					iommu_dev->ctx_id, TTBR0);
@@ -394,27 +433,6 @@
 
 	}
 
-	mmu->fault = 1;
-	iommu_dev->fault = 1;
-
-	kgsl_sharedmem_readl(&device->memstore, &curr_context_id,
-		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context));
-
-	context = kgsl_context_get(device, curr_context_id);
-
-	if (context != NULL) {
-		kgsl_sharedmem_readl(&device->memstore, &curr_global_ts,
-			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
-			eoptimestamp));
-
-		/* save pagefault timestamp for GFT */
-		set_bit(KGSL_CONTEXT_PAGEFAULT, &context->priv);
-		context->pagefault_ts = curr_global_ts;
-
-		kgsl_context_put(context);
-		context = NULL;
-	}
-
 	trace_kgsl_mmu_pagefault(iommu_dev->kgsldev, addr,
 			kgsl_mmu_get_ptname_from_ptbase(mmu, ptbase),
 			write ? "write" : "read");
@@ -1157,7 +1175,8 @@
 		if (ret)
 			goto err;
 
-		iommu_unit->iommu_halt_enable = data.iommu_halt_enable;
+		if (!msm_soc_version_supports_iommu_v0())
+			iommu_unit->iommu_halt_enable = 1;
 		iommu_unit->ahb_base = data.physstart - mmu->device->reg_phys;
 	}
 	iommu->unit_count = pdata_dev->iommu_count;
@@ -1215,10 +1234,12 @@
 	return 0;
 }
 
-static void kgsl_iommu_setstate(struct kgsl_mmu *mmu,
+static int kgsl_iommu_setstate(struct kgsl_mmu *mmu,
 				struct kgsl_pagetable *pagetable,
 				unsigned int context_id)
 {
+	int ret = 0;
+
 	if (mmu->flags & KGSL_FLAGS_STARTED) {
 		/* page table not current, then setup mmu to use new
 		 *  specified page table
@@ -1229,10 +1250,12 @@
 			flags |= kgsl_mmu_pt_get_flags(mmu->hwpagetable,
 							mmu->device->id) |
 							KGSL_MMUFLAGS_TLBFLUSH;
-			kgsl_setstate(mmu, context_id,
+			ret = kgsl_setstate(mmu, context_id,
 				KGSL_MMUFLAGS_PTUPDATE | flags);
 		}
 	}
+
+	return ret;
 }
 
 /*
@@ -1338,6 +1361,7 @@
 	int status = 0;
 	struct kgsl_iommu *iommu;
 
+	atomic_set(&mmu->fault, 0);
 	iommu = kzalloc(sizeof(struct kgsl_iommu), GFP_KERNEL);
 	if (!iommu) {
 		KGSL_CORE_ERR("kzalloc(%d) failed\n",
@@ -1791,7 +1815,7 @@
 	struct kgsl_iommu *iommu = mmu->priv;
 	int i, j;
 
-	if (mmu->fault) {
+	if (atomic_read(&mmu->fault)) {
 		for (i = 0; i < iommu->unit_count; i++) {
 			struct kgsl_iommu_unit *iommu_unit =
 						&iommu->iommu_units[i];
@@ -1803,12 +1827,16 @@
 						iommu_unit,
 						iommu_unit->dev[j].ctx_id,
 						RESUME, 1);
+					KGSL_IOMMU_SET_CTX_REG(iommu,
+						iommu_unit,
+						iommu_unit->dev[j].ctx_id,
+						FSR, 0);
 					_iommu_unlock(iommu);
 					iommu_unit->dev[j].fault = 0;
 				}
 			}
 		}
-		mmu->fault = 0;
+		atomic_set(&mmu->fault, 0);
 	}
 }
 
@@ -1902,31 +1930,40 @@
  * cpu
  * Return - void
  */
-static void kgsl_iommu_default_setstate(struct kgsl_mmu *mmu,
+static int kgsl_iommu_default_setstate(struct kgsl_mmu *mmu,
 					uint32_t flags)
 {
 	struct kgsl_iommu *iommu = mmu->priv;
 	int temp;
 	int i;
+	int ret = 0;
 	phys_addr_t pt_base = kgsl_iommu_get_pt_base_addr(mmu,
 						mmu->hwpagetable);
 	phys_addr_t pt_val;
 
-	if (kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_CONTEXT_USER)) {
+	ret = kgsl_iommu_enable_clk(mmu, KGSL_IOMMU_CONTEXT_USER);
+
+	if (ret) {
 		KGSL_DRV_ERR(mmu->device, "Failed to enable iommu clocks\n");
-		return;
+		return ret;
 	}
 
 	/* For v0 SMMU GPU needs to be idle for tlb invalidate as well */
-	if (msm_soc_version_supports_iommu_v0())
-		kgsl_idle(mmu->device);
+	if (msm_soc_version_supports_iommu_v0()) {
+		ret = kgsl_idle(mmu->device);
+		if (ret)
+			return ret;
+	}
 
 	/* Acquire GPU-CPU sync Lock here */
 	_iommu_lock(iommu);
 
 	if (flags & KGSL_MMUFLAGS_PTUPDATE) {
-		if (!msm_soc_version_supports_iommu_v0())
-			kgsl_idle(mmu->device);
+		if (!msm_soc_version_supports_iommu_v0()) {
+			ret = kgsl_idle(mmu->device);
+			if (ret)
+				goto unlock;
+		}
 		for (i = 0; i < iommu->unit_count; i++) {
 			/* get the lsb value which should not change when
 			 * changing ttbr0 */
@@ -1987,12 +2024,13 @@
 			}
 		}
 	}
-
+unlock:
 	/* Release GPU-CPU sync Lock here */
 	_iommu_unlock(iommu);
 
 	/* Disable smmu clock */
 	kgsl_iommu_disable_clk_on_ts(mmu, 0, false);
+	return ret;
 }
 
 /*
@@ -2049,6 +2087,7 @@
 	.mmu_pagefault_resume = kgsl_iommu_pagefault_resume,
 	.mmu_get_current_ptbase = kgsl_iommu_get_current_ptbase,
 	.mmu_enable_clk = kgsl_iommu_enable_clk,
+	.mmu_disable_clk = kgsl_iommu_disable_clk,
 	.mmu_disable_clk_on_ts = kgsl_iommu_disable_clk_on_ts,
 	.mmu_get_default_ttbr0 = kgsl_iommu_get_default_ttbr0,
 	.mmu_get_reg_gpuaddr = kgsl_iommu_get_reg_gpuaddr,
diff --git a/drivers/gpu/msm/kgsl_log.h b/drivers/gpu/msm/kgsl_log.h
index 94649bd..f90627e 100644
--- a/drivers/gpu/msm/kgsl_log.h
+++ b/drivers/gpu/msm/kgsl_log.h
@@ -101,15 +101,6 @@
 #define KGSL_PWR_CRIT(_dev, fmt, args...) \
 KGSL_LOG_CRIT(_dev->dev, _dev->pwr_log, fmt, ##args)
 
-#define KGSL_FT_INFO(_dev, fmt, args...) \
-KGSL_LOG_INFO(_dev->dev, _dev->ft_log, fmt, ##args)
-#define KGSL_FT_WARN(_dev, fmt, args...) \
-KGSL_LOG_WARN(_dev->dev, _dev->ft_log, fmt, ##args)
-#define KGSL_FT_ERR(_dev, fmt, args...) \
-KGSL_LOG_ERR(_dev->dev, _dev->ft_log, fmt, ##args)
-#define KGSL_FT_CRIT(_dev, fmt, args...) \
-KGSL_LOG_CRIT(_dev->dev, _dev->ft_log, fmt, ##args)
-
 /* Core error messages - these are for core KGSL functions that have
    no device associated with them (such as memory) */
 
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
index 952019f..6b04aad 100644
--- a/drivers/gpu/msm/kgsl_mmu.c
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -123,10 +123,12 @@
 
 	spin_lock_irqsave(&kgsl_driver.ptlock, flags);
 	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
-		if (pt->name == name) {
-			ret = pt;
-			kref_get(&ret->refcount);
-			break;
+		if (kref_get_unless_zero(&pt->refcount)) {
+			if (pt->name == name) {
+				ret = pt;
+				break;
+			}
+			kref_put(&pt->refcount, kgsl_destroy_pagetable);
 		}
 	}
 
@@ -323,9 +325,13 @@
 		return KGSL_MMU_GLOBAL_PT;
 	spin_lock(&kgsl_driver.ptlock);
 	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
-		if (mmu->mmu_ops->mmu_pt_equal(mmu, pt, pt_base)) {
-			ptid = (int) pt->name;
-			break;
+		if (kref_get_unless_zero(&pt->refcount)) {
+			if (mmu->mmu_ops->mmu_pt_equal(mmu, pt, pt_base)) {
+				ptid = (int) pt->name;
+				kref_put(&pt->refcount, kgsl_destroy_pagetable);
+				break;
+			}
+			kref_put(&pt->refcount, kgsl_destroy_pagetable);
 		}
 	}
 	spin_unlock(&kgsl_driver.ptlock);
@@ -345,16 +351,23 @@
 		return 0;
 	spin_lock(&kgsl_driver.ptlock);
 	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
-		if (mmu->mmu_ops->mmu_pt_equal(mmu, pt, pt_base)) {
-			if ((addr & ~(PAGE_SIZE-1)) == pt->fault_addr) {
-				ret = 1;
-				break;
-			} else {
-				pt->fault_addr = (addr & ~(PAGE_SIZE-1));
-				ret = 0;
-				break;
+		if (kref_get_unless_zero(&pt->refcount)) {
+			if (mmu->mmu_ops->mmu_pt_equal(mmu, pt, pt_base)) {
+				if ((addr & ~(PAGE_SIZE-1)) == pt->fault_addr) {
+					ret = 1;
+					kref_put(&pt->refcount,
+						kgsl_destroy_pagetable);
+					break;
+				} else {
+					pt->fault_addr =
+						(addr & ~(PAGE_SIZE-1));
+					ret = 0;
+					kref_put(&pt->refcount,
+						kgsl_destroy_pagetable);
+					break;
+				}
 			}
-
+			kref_put(&pt->refcount, kgsl_destroy_pagetable);
 		}
 	}
 	spin_unlock(&kgsl_driver.ptlock);
@@ -566,7 +579,7 @@
 }
 EXPORT_SYMBOL(kgsl_mmu_putpagetable);
 
-void kgsl_setstate(struct kgsl_mmu *mmu, unsigned int context_id,
+int kgsl_setstate(struct kgsl_mmu *mmu, unsigned int context_id,
 			uint32_t flags)
 {
 	struct kgsl_device *device = mmu->device;
@@ -574,14 +587,16 @@
 
 	if (!(flags & (KGSL_MMUFLAGS_TLBFLUSH | KGSL_MMUFLAGS_PTUPDATE))
 		&& !adreno_is_a2xx(adreno_dev))
-		return;
+		return 0;
 
 	if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type)
-		return;
+		return 0;
 	else if (device->ftbl->setstate)
-		device->ftbl->setstate(device, context_id, flags);
+		return device->ftbl->setstate(device, context_id, flags);
 	else if (mmu->mmu_ops->mmu_device_setstate)
-		mmu->mmu_ops->mmu_device_setstate(mmu, flags);
+		return mmu->mmu_ops->mmu_device_setstate(mmu, flags);
+
+	return 0;
 }
 EXPORT_SYMBOL(kgsl_setstate);
 
@@ -590,7 +605,6 @@
 	struct kgsl_mh *mh = &device->mh;
 	/* force mmu off to for now*/
 	kgsl_regwrite(device, MH_MMU_CONFIG, 0);
-	kgsl_idle(device);
 
 	/* define physical memory range accessible by the core */
 	kgsl_regwrite(device, MH_MMU_MPU_BASE, mh->mpu_base);
diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h
index faba81e..8bc9962 100644
--- a/drivers/gpu/msm/kgsl_mmu.h
+++ b/drivers/gpu/msm/kgsl_mmu.h
@@ -133,10 +133,10 @@
 	int (*mmu_close) (struct kgsl_mmu *mmu);
 	int (*mmu_start) (struct kgsl_mmu *mmu);
 	void (*mmu_stop) (struct kgsl_mmu *mmu);
-	void (*mmu_setstate) (struct kgsl_mmu *mmu,
+	int (*mmu_setstate) (struct kgsl_mmu *mmu,
 		struct kgsl_pagetable *pagetable,
 		unsigned int context_id);
-	void (*mmu_device_setstate) (struct kgsl_mmu *mmu,
+	int (*mmu_device_setstate) (struct kgsl_mmu *mmu,
 					uint32_t flags);
 	void (*mmu_pagefault) (struct kgsl_mmu *mmu);
 	phys_addr_t (*mmu_get_current_ptbase)
@@ -147,6 +147,8 @@
 		(struct kgsl_mmu *mmu, uint32_t ts, bool ts_valid);
 	int (*mmu_enable_clk)
 		(struct kgsl_mmu *mmu, int ctx_id);
+	void (*mmu_disable_clk)
+		(struct kgsl_mmu *mmu);
 	phys_addr_t (*mmu_get_default_ttbr0)(struct kgsl_mmu *mmu,
 				unsigned int unit_id,
 				enum kgsl_iommu_context_id ctx_id);
@@ -200,7 +202,7 @@
 	struct kgsl_pagetable  *hwpagetable;
 	const struct kgsl_mmu_ops *mmu_ops;
 	void *priv;
-	int fault;
+	atomic_t fault;
 	unsigned long pt_base;
 	unsigned long pt_size;
 	bool pt_per_process;
@@ -231,7 +233,7 @@
 int kgsl_mmu_put_gpuaddr(struct kgsl_pagetable *pagetable,
 		 struct kgsl_memdesc *memdesc);
 unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr);
-void kgsl_setstate(struct kgsl_mmu *mmu, unsigned int context_id,
+int kgsl_setstate(struct kgsl_mmu *mmu, unsigned int context_id,
 			uint32_t flags);
 int kgsl_mmu_get_ptname_from_ptbase(struct kgsl_mmu *mmu,
 					phys_addr_t pt_base);
@@ -260,19 +262,23 @@
 		return 0;
 }
 
-static inline void kgsl_mmu_setstate(struct kgsl_mmu *mmu,
+static inline int kgsl_mmu_setstate(struct kgsl_mmu *mmu,
 			struct kgsl_pagetable *pagetable,
 			unsigned int context_id)
 {
 	if (mmu->mmu_ops && mmu->mmu_ops->mmu_setstate)
-		mmu->mmu_ops->mmu_setstate(mmu, pagetable, context_id);
+		return mmu->mmu_ops->mmu_setstate(mmu, pagetable, context_id);
+
+	return 0;
 }
 
-static inline void kgsl_mmu_device_setstate(struct kgsl_mmu *mmu,
+static inline int kgsl_mmu_device_setstate(struct kgsl_mmu *mmu,
 						uint32_t flags)
 {
 	if (mmu->mmu_ops && mmu->mmu_ops->mmu_device_setstate)
-		mmu->mmu_ops->mmu_device_setstate(mmu, flags);
+		return mmu->mmu_ops->mmu_device_setstate(mmu, flags);
+
+	return 0;
 }
 
 static inline void kgsl_mmu_stop(struct kgsl_mmu *mmu)
@@ -320,6 +326,12 @@
 		return 0;
 }
 
+static inline void kgsl_mmu_disable_clk(struct kgsl_mmu *mmu)
+{
+	if (mmu->mmu_ops && mmu->mmu_ops->mmu_disable_clk)
+		mmu->mmu_ops->mmu_disable_clk(mmu);
+}
+
 static inline void kgsl_mmu_disable_clk_on_ts(struct kgsl_mmu *mmu,
 						unsigned int ts, bool ts_valid)
 {
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
index 6faf0bf..8ed29fb 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -570,7 +570,11 @@
 {
 	int ret;
 	struct kgsl_device *device = kgsl_device_from_dev(dev);
-	struct kgsl_clk_stats *clkstats = &device->pwrctrl.clk_stats;
+	struct kgsl_clk_stats *clkstats;
+
+	if (device == NULL)
+		return 0;
+	clkstats = &device->pwrctrl.clk_stats;
 	ret = snprintf(buf, PAGE_SIZE, "%7d %7d\n",
 			clkstats->on_time_old, clkstats->elapsed_old);
 	if (!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) {
@@ -586,10 +590,13 @@
 {
 	int ret;
 	struct kgsl_device *device = kgsl_device_from_dev(dev);
-	struct kgsl_clk_stats *clkstats = &device->pwrctrl.clk_stats;
+	struct kgsl_clk_stats *clkstats;
 	int i = 0;
 	char *ptr = buf;
 
+	if (device == NULL)
+		return 0;
+	clkstats = &device->pwrctrl.clk_stats;
 	ret = snprintf(buf, PAGE_SIZE, "%7d %7d ", clkstats->on_time_old,
 					clkstats->elapsed_old);
 	for (i = 0, ptr += ret; i < device->pwrctrl.num_pwrlevels;
@@ -630,6 +637,8 @@
 					char *buf)
 {
 	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	if (device == NULL)
+		return 0;
 	return snprintf(buf, PAGE_SIZE, "%d\n", device->reset_counter);
 }
 
@@ -659,8 +668,10 @@
 					char *buf, int flag)
 {
 	struct kgsl_device *device = kgsl_device_from_dev(dev);
-	int i = test_bit(flag, &device->pwrctrl.ctrl_flags);
-	return snprintf(buf, PAGE_SIZE, "%d\n", i);
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		test_bit(flag, &device->pwrctrl.ctrl_flags));
 }
 
 static int __force_on_store(struct device *dev,
@@ -1215,9 +1226,6 @@
 		} else {
 			device->pwrctrl.irq_last = 0;
 		}
-	} else if (device->state & (KGSL_STATE_HUNG |
-					KGSL_STATE_DUMP_AND_FT)) {
-		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
 	}
 
 	mutex_unlock(&device->mutex);
@@ -1273,7 +1281,6 @@
 			kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
 			return -EBUSY;
 		}
-		del_timer_sync(&device->hang_timer);
 		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
 		kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_NAP);
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_NAP);
@@ -1343,7 +1350,6 @@
 	case KGSL_STATE_NAP:
 	case KGSL_STATE_SLEEP:
 		del_timer_sync(&device->idle_timer);
-		del_timer_sync(&device->hang_timer);
 		/* make sure power is on to stop the device*/
 		kgsl_pwrctrl_enable(device);
 		device->ftbl->suspend_context(device);
@@ -1434,9 +1440,8 @@
 		/* Enable state before turning on irq */
 		kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
 		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
-
-		mod_timer(&device->hang_timer,
-			(jiffies + msecs_to_jiffies(KGSL_TIMEOUT_PART)));
+		mod_timer(&device->idle_timer, jiffies +
+				device->pwrctrl.interval_timeout);
 		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
 				device->pwrctrl.pm_qos_latency);
 	case KGSL_STATE_ACTIVE:
@@ -1506,10 +1511,6 @@
 		return "SLEEP";
 	case KGSL_STATE_SUSPEND:
 		return "SUSPEND";
-	case KGSL_STATE_HUNG:
-		return "HUNG";
-	case KGSL_STATE_DUMP_AND_FT:
-		return "DNR";
 	case KGSL_STATE_SLUMBER:
 		return "SLUMBER";
 	default:
@@ -1539,16 +1540,9 @@
 
 	if ((atomic_read(&device->active_cnt) == 0) &&
 		(device->state != KGSL_STATE_ACTIVE)) {
-
-		if (device->state != KGSL_STATE_DUMP_AND_FT) {
-			mutex_unlock(&device->mutex);
-			wait_for_completion(&device->hwaccess_gate);
-			wait_for_completion(&device->ft_gate);
-			mutex_lock(&device->mutex);
-		}
-
-		/* Stop the idle timer */
-		del_timer_sync(&device->idle_timer);
+		mutex_unlock(&device->mutex);
+		wait_for_completion(&device->hwaccess_gate);
+		mutex_lock(&device->mutex);
 
 		ret = kgsl_pwrctrl_wake(device);
 	}
@@ -1632,17 +1626,19 @@
  */
 int kgsl_active_count_wait(struct kgsl_device *device, int count)
 {
-	int ret = 0;
+	int result = 0;
 
 	BUG_ON(!mutex_is_locked(&device->mutex));
 
 	if (atomic_read(&device->active_cnt) > count) {
+		int ret;
 		mutex_unlock(&device->mutex);
 		ret = wait_event_timeout(device->active_cnt_wq,
 			_check_active_count(device, count), HZ);
 		mutex_lock(&device->mutex);
+		result = ret == 0 ? -ETIMEDOUT : 0;
 	}
 
-	return ret == 0 ? -ETIMEDOUT : 0;
+	return result;
 }
 EXPORT_SYMBOL(kgsl_active_count_wait);
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h
index 71a0fdd..9f18160 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.h
+++ b/drivers/gpu/msm/kgsl_pwrctrl.h
@@ -120,8 +120,8 @@
 void kgsl_pwrctrl_set_state(struct kgsl_device *device, unsigned int state);
 void kgsl_pwrctrl_request_state(struct kgsl_device *device, unsigned int state);
 
-int kgsl_active_count_get(struct kgsl_device *device);
-int kgsl_active_count_get_light(struct kgsl_device *device);
+int __must_check kgsl_active_count_get(struct kgsl_device *device);
+int __must_check kgsl_active_count_get_light(struct kgsl_device *device);
 void kgsl_active_count_put(struct kgsl_device *device);
 int kgsl_active_count_wait(struct kgsl_device *device, int count);
 
diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c
index 5950451..d031d5e 100644
--- a/drivers/gpu/msm/kgsl_sharedmem.c
+++ b/drivers/gpu/msm/kgsl_sharedmem.c
@@ -744,6 +744,8 @@
 	BUG_ON(size == 0);
 
 	size = ALIGN(size, PAGE_SIZE * 2);
+	if (size == 0)
+		return -EINVAL;
 
 	ret =  _kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
 	if (!ret)
@@ -759,7 +761,11 @@
 			    struct kgsl_pagetable *pagetable,
 			    size_t size)
 {
-	return _kgsl_sharedmem_page_alloc(memdesc, pagetable, PAGE_ALIGN(size));
+	size = PAGE_ALIGN(size);
+	if (size == 0)
+		return -EINVAL;
+
+	return _kgsl_sharedmem_page_alloc(memdesc, pagetable, size);
 }
 EXPORT_SYMBOL(kgsl_sharedmem_page_alloc_user);
 
@@ -769,6 +775,8 @@
 	int result = 0;
 
 	size = ALIGN(size, PAGE_SIZE);
+	if (size == 0)
+		return -EINVAL;
 
 	memdesc->size = size;
 	memdesc->ops = &kgsl_coherent_ops;
@@ -855,6 +863,9 @@
 			size_t size)
 {
 	size = ALIGN(size, PAGE_SIZE);
+	if (size == 0)
+		return -EINVAL;
+
 	return _kgsl_sharedmem_ebimem(memdesc, pagetable, size);
 }
 EXPORT_SYMBOL(kgsl_sharedmem_ebimem_user);
@@ -865,6 +876,9 @@
 {
 	int result;
 	size = ALIGN(size, 8192);
+	if (size == 0)
+		return -EINVAL;
+
 	result = _kgsl_sharedmem_ebimem(memdesc, pagetable, size);
 
 	if (result)
diff --git a/drivers/gpu/msm/kgsl_sync.c b/drivers/gpu/msm/kgsl_sync.c
index b7d7235..dc3ad21 100644
--- a/drivers/gpu/msm/kgsl_sync.c
+++ b/drivers/gpu/msm/kgsl_sync.c
@@ -11,6 +11,7 @@
  *
  */
 
+#include <linux/err.h>
 #include <linux/file.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
@@ -281,3 +282,65 @@
 {
 	sync_timeline_destroy(context->timeline);
 }
+
+static void kgsl_sync_callback(struct sync_fence *fence,
+	struct sync_fence_waiter *waiter)
+{
+	struct kgsl_sync_fence_waiter *kwaiter =
+		(struct kgsl_sync_fence_waiter *) waiter;
+	kwaiter->func(kwaiter->priv);
+	sync_fence_put(kwaiter->fence);
+	kfree(kwaiter);
+}
+
+struct kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd,
+	void (*func)(void *priv), void *priv)
+{
+	struct kgsl_sync_fence_waiter *kwaiter;
+	struct sync_fence *fence;
+	int status;
+
+	fence = sync_fence_fdget(fd);
+	if (fence == NULL)
+		return ERR_PTR(-EINVAL);
+
+	/* create the waiter */
+	kwaiter = kzalloc(sizeof(*kwaiter), GFP_ATOMIC);
+	if (kwaiter == NULL) {
+		sync_fence_put(fence);
+		return ERR_PTR(-ENOMEM);
+	}
+	kwaiter->fence = fence;
+	kwaiter->priv = priv;
+	kwaiter->func = func;
+	sync_fence_waiter_init((struct sync_fence_waiter *) kwaiter,
+		kgsl_sync_callback);
+
+	/* if status then error or signaled */
+	status = sync_fence_wait_async(fence,
+		(struct sync_fence_waiter *) kwaiter);
+	if (status) {
+		kfree(kwaiter);
+		sync_fence_put(fence);
+		if (status < 0)
+			kwaiter = ERR_PTR(status);
+		else
+			kwaiter = NULL;
+	}
+
+	return kwaiter;
+}
+
+int kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *kwaiter)
+{
+	if (kwaiter == NULL)
+		return 0;
+
+	if (sync_fence_cancel_async(kwaiter->fence,
+		(struct sync_fence_waiter *) kwaiter) == 0) {
+		sync_fence_put(kwaiter->fence);
+		kfree(kwaiter);
+		return 1;
+	}
+	return 0;
+}
diff --git a/drivers/gpu/msm/kgsl_sync.h b/drivers/gpu/msm/kgsl_sync.h
index 63adf06..275eaf0 100644
--- a/drivers/gpu/msm/kgsl_sync.h
+++ b/drivers/gpu/msm/kgsl_sync.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2012-2013, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -28,6 +28,13 @@
 	unsigned int timestamp;
 };
 
+struct kgsl_sync_fence_waiter {
+	struct sync_fence_waiter waiter;
+	struct sync_fence *fence;
+	void (*func)(void *priv);
+	void *priv;
+};
+
 #if defined(CONFIG_SYNC)
 struct sync_pt *kgsl_sync_pt_create(struct sync_timeline *timeline,
 	unsigned int timestamp);
@@ -39,6 +46,9 @@
 void kgsl_sync_timeline_signal(struct sync_timeline *timeline,
 	unsigned int timestamp);
 void kgsl_sync_timeline_destroy(struct kgsl_context *context);
+struct kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd,
+	void (*func)(void *priv), void *priv);
+int kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *waiter);
 #else
 static inline struct sync_pt
 *kgsl_sync_pt_create(struct sync_timeline *timeline, unsigned int timestamp)
@@ -72,6 +82,20 @@
 static inline void kgsl_sync_timeline_destroy(struct kgsl_context *context)
 {
 }
+
+static inline struct
+kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd,
+	void (*func)(void *priv), void *priv)
+{
+	return NULL;
+}
+
+static inline int
+kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *waiter)
+{
+	return 1;
+}
+
 #endif
 
 #endif /* __KGSL_SYNC_H */
diff --git a/drivers/gpu/msm/kgsl_trace.h b/drivers/gpu/msm/kgsl_trace.h
index f16f2b4..5f39b8b 100644
--- a/drivers/gpu/msm/kgsl_trace.h
+++ b/drivers/gpu/msm/kgsl_trace.h
@@ -37,14 +37,13 @@
 
 	TP_PROTO(struct kgsl_device *device,
 			int drawctxt_id,
-			struct kgsl_ibdesc *ibdesc,
-			int numibs,
+			struct kgsl_cmdbatch *cmdbatch,
 			int timestamp,
 			int flags,
 			int result,
 			unsigned int type),
 
-	TP_ARGS(device, drawctxt_id, ibdesc, numibs, timestamp, flags,
+	TP_ARGS(device, drawctxt_id, cmdbatch, timestamp, flags,
 		result, type),
 
 	TP_STRUCT__entry(
@@ -61,8 +60,8 @@
 	TP_fast_assign(
 		__assign_str(device_name, device->name);
 		__entry->drawctxt_id = drawctxt_id;
-		__entry->ibdesc_addr = ibdesc[0].gpuaddr;
-		__entry->numibs = numibs;
+		__entry->ibdesc_addr = cmdbatch->ibdesc[0].gpuaddr;
+		__entry->numibs = cmdbatch->ibcount;
 		__entry->timestamp = timestamp;
 		__entry->flags = flags;
 		__entry->result = result;
@@ -731,42 +730,46 @@
 );
 
 TRACE_EVENT(kgsl_register_event,
-		TP_PROTO(unsigned int id, unsigned int timestamp),
-		TP_ARGS(id, timestamp),
+		TP_PROTO(unsigned int id, unsigned int timestamp, void *func),
+		TP_ARGS(id, timestamp, func),
 		TP_STRUCT__entry(
 			__field(unsigned int, id)
 			__field(unsigned int, timestamp)
+			__field(void *, func)
 		),
 		TP_fast_assign(
 			__entry->id = id;
 			__entry->timestamp = timestamp;
+			__entry->func = func;
 		),
 		TP_printk(
-			"ctx=%u ts=%u",
-			__entry->id, __entry->timestamp)
+			"ctx=%u ts=%u cb=%pF",
+			__entry->id, __entry->timestamp, __entry->func)
 );
 
 TRACE_EVENT(kgsl_fire_event,
 		TP_PROTO(unsigned int id, unsigned int ts,
-			unsigned int type, unsigned int age),
-		TP_ARGS(id, ts, type, age),
+			unsigned int type, unsigned int age, void *func),
+		TP_ARGS(id, ts, type, age, func),
 		TP_STRUCT__entry(
 			__field(unsigned int, id)
 			__field(unsigned int, ts)
 			__field(unsigned int, type)
 			__field(unsigned int, age)
+			__field(void *, func)
 		),
 		TP_fast_assign(
 			__entry->id = id;
 			__entry->ts = ts;
 			__entry->type = type;
 			__entry->age = age;
+			__entry->func = func;
 		),
 		TP_printk(
-			"ctx=%u ts=%u type=%s age=%u",
+			"ctx=%u ts=%u type=%s age=%u cb=%pF",
 			__entry->id, __entry->ts,
 			__print_symbolic(__entry->type, KGSL_EVENT_TYPES),
-			__entry->age)
+			__entry->age, __entry->func)
 );
 
 TRACE_EVENT(kgsl_active_count,
diff --git a/drivers/gpu/msm/z180.c b/drivers/gpu/msm/z180.c
index 103a751..ae7aee0 100644
--- a/drivers/gpu/msm/z180.c
+++ b/drivers/gpu/msm/z180.c
@@ -156,7 +156,6 @@
 		.drv_log = KGSL_LOG_LEVEL_DEFAULT,
 		.mem_log = KGSL_LOG_LEVEL_DEFAULT,
 		.pwr_log = KGSL_LOG_LEVEL_DEFAULT,
-		.ft_log = KGSL_LOG_LEVEL_DEFAULT,
 		.pm_dump_enable = 0,
 	},
 	.cmdwin_lock = __SPIN_LOCK_INITIALIZER(device_2d1.cmdwin_lock),
@@ -362,7 +361,13 @@
 	return ts_diff < Z180_PACKET_COUNT;
 }
 
-static int z180_idle(struct kgsl_device *device)
+/**
+ * z180_idle() - Idle the 2D device
+ * @device: Pointer to the KGSL device struct for the Z180
+ *
+ * wait until the z180 submission queue is idle
+ */
+int z180_idle(struct kgsl_device *device)
 {
 	int status = 0;
 	struct z180_device *z180_dev = Z180_DEVICE(device);
@@ -382,10 +387,8 @@
 int
 z180_cmdstream_issueibcmds(struct kgsl_device_private *dev_priv,
 			struct kgsl_context *context,
-			struct kgsl_ibdesc *ibdesc,
-			unsigned int numibs,
-			uint32_t *timestamp,
-			unsigned int ctrl)
+			struct kgsl_cmdbatch *cmdbatch,
+			uint32_t *timestamp)
 {
 	long result = 0;
 	unsigned int ofs        = PACKETSIZE_STATESTREAM * sizeof(unsigned int);
@@ -398,6 +401,22 @@
 	struct kgsl_pagetable *pagetable = dev_priv->process_priv->pagetable;
 	struct z180_device *z180_dev = Z180_DEVICE(device);
 	unsigned int sizedwords;
+	unsigned int numibs;
+	struct kgsl_ibdesc *ibdesc;
+
+	mutex_lock(&device->mutex);
+
+	result = kgsl_active_count_get(device);
+	if (result)
+		goto error_active_count;
+
+	if (cmdbatch == NULL) {
+		result = EINVAL;
+		goto error;
+	}
+
+	ibdesc = cmdbatch->ibdesc;
+	numibs = cmdbatch->ibcount;
 
 	if (device->state & KGSL_STATE_HUNG) {
 		result = -EINVAL;
@@ -439,7 +458,7 @@
 		context->id, cmd, sizedwords);
 	/* context switch */
 	if ((context->id != (int)z180_dev->ringbuffer.prevctx) ||
-	    (ctrl & KGSL_CONTEXT_CTX_SWITCH)) {
+	    (cmdbatch->flags & KGSL_CONTEXT_CTX_SWITCH)) {
 		KGSL_CMD_INFO(device, "context switch %d -> %d\n",
 			context->id, z180_dev->ringbuffer.prevctx);
 		kgsl_mmu_setstate(&device->mmu, pagetable,
@@ -447,10 +466,13 @@
 		cnt = PACKETSIZE_STATESTREAM;
 		ofs = 0;
 	}
-	kgsl_setstate(&device->mmu,
+
+	result = kgsl_setstate(&device->mmu,
 			KGSL_MEMSTORE_GLOBAL,
 			kgsl_mmu_pt_get_flags(device->mmu.hwpagetable,
 			device->id));
+	if (result < 0)
+		goto error;
 
 	result = wait_event_interruptible_timeout(device->wait_queue,
 				  room_in_rb(z180_dev),
@@ -491,9 +513,12 @@
 	z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, cmd);
 	z180_cmdwindow_write(device, ADDR_VGV3_CONTROL, 0);
 error:
+	kgsl_trace_issueibcmds(device, context->id, cmdbatch,
+		*timestamp, cmdbatch ? cmdbatch->flags : 0, result, 0);
 
-	kgsl_trace_issueibcmds(device, context->id, ibdesc, numibs,
-		*timestamp, ctrl, result, 0);
+	kgsl_active_count_put(device);
+error_active_count:
+	mutex_unlock(&device->mutex);
 
 	return (int)result;
 }
@@ -604,8 +629,12 @@
 
 static int z180_stop(struct kgsl_device *device)
 {
+	int ret;
+
 	device->ftbl->irqctrl(device, 0);
-	z180_idle(device);
+	ret = z180_idle(device);
+	if (ret)
+		return ret;
 
 	del_timer_sync(&device->idle_timer);
 
@@ -671,7 +700,7 @@
 	return status;
 }
 
-static unsigned int z180_isidle(struct kgsl_device *device)
+static bool z180_isidle(struct kgsl_device *device)
 {
 	struct z180_device *z180_dev = Z180_DEVICE(device);
 
@@ -836,9 +865,13 @@
 	if (msecs == -1)
 		msecs = Z180_IDLE_TIMEOUT;
 
-	mutex_unlock(&device->mutex);
-	status = z180_wait(device, context, timestamp, msecs);
-	mutex_lock(&device->mutex);
+	status = kgsl_active_count_get(device);
+	if (!status) {
+		mutex_unlock(&device->mutex);
+		status = z180_wait(device, context, timestamp, msecs);
+		mutex_lock(&device->mutex);
+		kgsl_active_count_put(device);
+	}
 
 	return status;
 }
@@ -884,23 +917,33 @@
 	return context;
 }
 
-static void
+static int
 z180_drawctxt_detach(struct kgsl_context *context)
 {
+	int ret;
 	struct kgsl_device *device;
 	struct z180_device *z180_dev;
 
 	device = context->device;
 	z180_dev = Z180_DEVICE(device);
 
+	ret = kgsl_active_count_get(device);
+	if (ret)
+		return ret;
+
 	z180_idle(device);
 
 	if (z180_dev->ringbuffer.prevctx == context->id) {
 		z180_dev->ringbuffer.prevctx = Z180_INVALID_CONTEXT;
 		device->mmu.hwpagetable = device->mmu.defaultpagetable;
+
+		/* Ignore the result - we are going down anyway */
 		kgsl_setstate(&device->mmu, KGSL_MEMSTORE_GLOBAL,
 				KGSL_MMUFLAGS_PTUPDATE);
 	}
+
+	kgsl_active_count_put(device);
+	return 0;
 }
 
 static void
@@ -974,6 +1017,7 @@
 	.irqctrl = z180_irqctrl,
 	.gpuid = z180_gpuid,
 	.irq_handler = z180_irq_handler,
+	.drain = z180_idle, /* drain == idle for the z180 */
 	/* Optional functions */
 	.drawctxt_create = z180_drawctxt_create,
 	.drawctxt_detach = z180_drawctxt_detach,
diff --git a/drivers/gpu/msm/z180.h b/drivers/gpu/msm/z180.h
index 1be0870..a36e92d 100644
--- a/drivers/gpu/msm/z180.h
+++ b/drivers/gpu/msm/z180.h
@@ -45,5 +45,6 @@
 };
 
 int z180_dump(struct kgsl_device *, int);
+int z180_idle(struct kgsl_device *);
 
 #endif /* __Z180_H */
diff --git a/drivers/gpu/msm/z180_postmortem.c b/drivers/gpu/msm/z180_postmortem.c
index 5d929cf..bc53c0e 100644
--- a/drivers/gpu/msm/z180_postmortem.c
+++ b/drivers/gpu/msm/z180_postmortem.c
@@ -58,6 +58,8 @@
 	unsigned int i;
 	unsigned int reg_val;
 
+	z180_idle(device);
+
 	KGSL_LOG_DUMP(device, "Z180 Register Dump\n");
 	for (i = 0; i < ARRAY_SIZE(regs_to_dump); i++) {
 		kgsl_regread(device,
diff --git a/drivers/media/platform/msm/vidc/msm_smem.h b/drivers/media/platform/msm/vidc/msm_smem.h
index dc384bc..7bd6443 100644
--- a/drivers/media/platform/msm/vidc/msm_smem.h
+++ b/drivers/media/platform/msm/vidc/msm_smem.h
@@ -28,6 +28,10 @@
 	SMEM_SECURE = ION_FLAG_SECURE,
 };
 
+/* NOTE: if you change this enum you MUST update the
+ * "buffer-type-tz-usage-table" for any affected target
+ * in arch/arm/boot/dts/<arch>.dtsi
+ */
 enum hal_buffer {
 	HAL_BUFFER_INPUT = 0x1,
 	HAL_BUFFER_OUTPUT = 0x2,
diff --git a/drivers/net/wireless/wcnss/wcnss_wlan.c b/drivers/net/wireless/wcnss/wcnss_wlan.c
index 33d11b1..8dc30ee 100644
--- a/drivers/net/wireless/wcnss/wcnss_wlan.c
+++ b/drivers/net/wireless/wcnss/wcnss_wlan.c
@@ -2032,19 +2032,17 @@
 static int wcnss_node_open(struct inode *inode, struct file *file)
 {
 	struct platform_device *pdev;
+	int rc = 0;
 
 	if (!penv)
 		return -EFAULT;
 
-	/* first open is only to trigger WCNSS platform driver */
 	if (!penv->triggered) {
 		pr_info(DEVICE " triggered by userspace\n");
 		pdev = penv->pdev;
-		return wcnss_trigger_config(pdev);
-
-	} else if (penv->device_opened) {
-		pr_info(DEVICE " already opened\n");
-		return -EBUSY;
+		rc = wcnss_trigger_config(pdev);
+		if (rc)
+			return -EFAULT;
 	}
 
 	mutex_lock(&penv->dev_lock);
@@ -2055,7 +2053,7 @@
 	penv->device_opened = 1;
 	mutex_unlock(&penv->dev_lock);
 
-	return 0;
+	return rc;
 }
 
 static ssize_t wcnss_wlan_read(struct file *fp, char __user
diff --git a/include/linux/kref.h b/include/linux/kref.h
index 9c07dce..aa5acc26 100644
--- a/include/linux/kref.h
+++ b/include/linux/kref.h
@@ -93,4 +93,26 @@
 {
 	return kref_sub(kref, 1, release);
 }
+
+
+/**
+ * kref_get_unless_zero - Increment refcount for object unless it is zero.
+ * @kref: object.
+ *
+ * Return non-zero if the increment succeeded. Otherwise return 0.
+ *
+ * This function is intended to simplify locking around refcounting for
+ * objects that can be looked up from a lookup structure, and which are
+ * removed from that lookup structure in the object destructor.
+ * Operations on such objects require at least a read lock around
+ * lookup + kref_get, and a write lock around kref_put + remove from lookup
+ * structure. Furthermore, RCU implementations become extremely tricky.
+ * With a lookup followed by a kref_get_unless_zero *with return value check*
+ * locking in the kref_put path can be deferred to the actual removal from
+ * the lookup structure and RCU lookups become trivial.
+ */
+static inline int __must_check kref_get_unless_zero(struct kref *kref)
+{
+	return atomic_add_unless(&kref->refcount, 1, 0);
+}
 #endif /* _KREF_H_ */
diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h
index 87047d2..f74fcbe 100644
--- a/include/linux/msm_kgsl.h
+++ b/include/linux/msm_kgsl.h
@@ -20,7 +20,10 @@
 #define KGSL_CONTEXT_TRASH_STATE	0x00000020
 #define KGSL_CONTEXT_PER_CONTEXT_TS	0x00000040
 #define KGSL_CONTEXT_USER_GENERATED_TS	0x00000080
+#define KGSL_CONTEXT_END_OF_FRAME	0x00000100
+
 #define KGSL_CONTEXT_NO_FAULT_TOLERANCE 0x00000200
+#define KGSL_CONTEXT_SYNC               0x00000400
 /* bits [12:15] are reserved for future use */
 #define KGSL_CONTEXT_TYPE_MASK          0x01F00000
 #define KGSL_CONTEXT_TYPE_SHIFT         20
@@ -283,7 +286,7 @@
 #define IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID \
 	_IOW(KGSL_IOC_TYPE, 0x7, struct kgsl_device_waittimestamp_ctxtid)
 
-/* issue indirect commands to the GPU.
+/* DEPRECATED: issue indirect commands to the GPU.
  * drawctxt_id must have been created with IOCTL_KGSL_DRAWCTXT_CREATE
  * ibaddr and sizedwords must specify a subset of a buffer created
  * with IOCTL_KGSL_SHAREDMEM_FROM_PMEM
@@ -291,6 +294,9 @@
  * timestamp is a returned counter value which can be passed to
  * other ioctls to determine when the commands have been executed by
  * the GPU.
+ *
+ * This fucntion is deprecated - consider using IOCTL_KGSL_SUBMIT_COMMANDS
+ * instead
  */
 struct kgsl_ringbuffer_issueibcmds {
 	unsigned int drawctxt_id;
@@ -805,6 +811,77 @@
 #define IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK \
 	_IOWR(KGSL_IOC_TYPE, 0x3C, struct kgsl_gpumem_sync_cache_bulk)
 
+/*
+ * struct kgsl_cmd_syncpoint_timestamp
+ * @context_id: ID of a KGSL context
+ * @timestamp: GPU timestamp
+ *
+ * This structure defines a syncpoint comprising a context/timestamp pair. A
+ * list of these may be passed by IOCTL_KGSL_SUBMIT_COMMANDS to define
+ * dependencies that must be met before the command can be submitted to the
+ * hardware
+ */
+struct kgsl_cmd_syncpoint_timestamp {
+	unsigned int context_id;
+	unsigned int timestamp;
+};
+
+#define KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP 0
+
+struct kgsl_cmd_syncpoint_fence {
+	int fd;
+};
+
+#define KGSL_CMD_SYNCPOINT_TYPE_FENCE 1
+
+/**
+ * struct kgsl_cmd_syncpoint - Define a sync point for a command batch
+ * @type: type of sync point defined here
+ * @priv: Pointer to the type specific buffer
+ * @size: Size of the type specific buffer
+ *
+ * This structure contains pointers defining a specific command sync point.
+ * The pointer and size should point to a type appropriate structure.
+ */
+struct kgsl_cmd_syncpoint {
+	int type;
+	void __user *priv;
+	unsigned int size;
+};
+
+/**
+ * struct kgsl_submit_commands - Argument to IOCTL_KGSL_SUBMIT_COMMANDS
+ * @context_id: KGSL context ID that owns the commands
+ * @flags:
+ * @cmdlist: User pointer to a list of kgsl_ibdesc structures
+ * @numcmds: Number of commands listed in cmdlist
+ * @synclist: User pointer to a list of kgsl_cmd_syncpoint structures
+ * @numsyncs: Number of sync points listed in synclist
+ * @timestamp: On entry the a user defined timestamp, on exist the timestamp
+ * assigned to the command batch
+ *
+ * This structure specifies a command to send to the GPU hardware.  This is
+ * similar to kgsl_issueibcmds expect that it doesn't support the legacy way to
+ * submit IB lists and it adds sync points to block the IB until the
+ * dependencies are satisified.  This entry point is the new and preferred way
+ * to submit commands to the GPU.
+ */
+
+struct kgsl_submit_commands {
+	unsigned int context_id;
+	unsigned int flags;
+	struct kgsl_ibdesc __user *cmdlist;
+	unsigned int numcmds;
+	struct kgsl_cmd_syncpoint __user *synclist;
+	unsigned int numsyncs;
+	unsigned int timestamp;
+/* private: reserved for future use */
+	unsigned int __pad[4];
+};
+
+#define IOCTL_KGSL_SUBMIT_COMMANDS \
+	_IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands)
+
 #ifdef __KERNEL__
 #ifdef CONFIG_MSM_KGSL_DRM
 int kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start,
diff --git a/sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c b/sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c
index 73dd9df..dc41948 100644
--- a/sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c
+++ b/sound/soc/msm/qdsp6v2/msm-pcm-q6-v2.c
@@ -396,6 +396,18 @@
 									ret);
 		}
 	}
+	ret = snd_pcm_hw_constraint_step(runtime, 0,
+		SNDRV_PCM_HW_PARAM_PERIOD_BYTES, 32);
+	if (ret < 0) {
+		pr_err("constraint for period bytes step ret = %d\n",
+								ret);
+	}
+	ret = snd_pcm_hw_constraint_step(runtime, 0,
+		SNDRV_PCM_HW_PARAM_BUFFER_BYTES, 32);
+	if (ret < 0) {
+		pr_err("constraint for buffer bytes step ret = %d\n",
+								ret);
+	}
 
 	prtd->dsp_cnt = 0;
 	prtd->set_channel_map = false;
diff --git a/sound/soc/msm/qdsp6v2/q6adm.c b/sound/soc/msm/qdsp6v2/q6adm.c
index 27c74ce..3ee6f6e 100644
--- a/sound/soc/msm/qdsp6v2/q6adm.c
+++ b/sound/soc/msm/qdsp6v2/q6adm.c
@@ -1144,8 +1144,8 @@
 			open.dev_channel_mapping[1] = PCM_CHANNEL_FR;
 		} else if (channel_mode == 3)	{
 			open.dev_channel_mapping[0] = PCM_CHANNEL_FL;
-			open.dev_channel_mapping[0] = PCM_CHANNEL_FR;
-			open.dev_channel_mapping[1] = PCM_CHANNEL_FC;
+			open.dev_channel_mapping[1] = PCM_CHANNEL_FR;
+			open.dev_channel_mapping[2] = PCM_CHANNEL_FC;
 		} else if (channel_mode == 4) {
 			open.dev_channel_mapping[0] = PCM_CHANNEL_FL;
 			open.dev_channel_mapping[1] = PCM_CHANNEL_FR;