Merge "msm: kgsl: Add the per context preemption buffer"
diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile
index f513207..0058226 100644
--- a/drivers/gpu/msm/Makefile
+++ b/drivers/gpu/msm/Makefile
@@ -38,6 +38,7 @@
 	adreno_a6xx_snapshot.o \
 	adreno_a4xx_preempt.o \
 	adreno_a5xx_preempt.o \
+	adreno_a6xx_preempt.o \
 	adreno_sysfs.o \
 	adreno.o \
 	adreno_cp_parser.o \
diff --git a/drivers/gpu/msm/a6xx_reg.h b/drivers/gpu/msm/a6xx_reg.h
index 58ef5ee..f4552b6 100644
--- a/drivers/gpu/msm/a6xx_reg.h
+++ b/drivers/gpu/msm/a6xx_reg.h
@@ -70,6 +70,15 @@
 #define A6XX_CP_ADDR_MODE_CNTL           0x842
 #define A6XX_CP_PROTECT_CNTL             0x84F
 #define A6XX_CP_PROTECT_REG              0x850
+#define A6XX_CP_CONTEXT_SWITCH_CNTL      0x8A0
+#define A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO   0x8A1
+#define A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI   0x8A2
+#define A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO   0x8A3
+#define A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI   0x8A4
+#define A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_LO   0x8A5
+#define A6XX_CP_CONTEXT_SWITCH_PRIV_SECURE_RESTORE_ADDR_HI   0x8A6
+#define A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO   0x8A7
+#define A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI   0x8A8
 #define A6XX_CP_PERFCTR_CP_SEL_0         0x8D0
 #define A6XX_CP_PERFCTR_CP_SEL_1         0x8D1
 #define A6XX_CP_PERFCTR_CP_SEL_2         0x8D2
@@ -590,6 +599,7 @@
 #define A6XX_RB_PERFCTR_CMP_SEL_1           0x8E2D
 #define A6XX_RB_PERFCTR_CMP_SEL_2           0x8E2E
 #define A6XX_RB_PERFCTR_CMP_SEL_3           0x8E2F
+#define A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE 0x8E50
 
 /* PC registers */
 #define A6XX_PC_DBG_ECO_CNTL                0x9E00
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 7a6581c..c7e3ad7 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -845,11 +845,14 @@
 				unsigned int *cmds,
 				struct kgsl_context *context);
 	int (*preemption_yield_enable)(unsigned int *);
+	unsigned int (*preemption_set_marker)(unsigned int *cmds, int start);
 	unsigned int (*preemption_post_ibsubmit)(
 				struct adreno_device *adreno_dev,
 				unsigned int *cmds);
 	int (*preemption_init)(struct adreno_device *);
 	void (*preemption_schedule)(struct adreno_device *);
+	int (*preemption_context_init)(struct kgsl_context *);
+	void (*preemption_context_destroy)(struct kgsl_context *);
 	void (*enable_64bit)(struct adreno_device *);
 	void (*clk_set_options)(struct adreno_device *,
 				const char *, struct clk *, bool on);
diff --git a/drivers/gpu/msm/adreno_a6xx.c b/drivers/gpu/msm/adreno_a6xx.c
index 3cbb68e..ad0ce44 100644
--- a/drivers/gpu/msm/adreno_a6xx.c
+++ b/drivers/gpu/msm/adreno_a6xx.c
@@ -29,9 +29,6 @@
 #include "kgsl_gmu.h"
 #include "kgsl_trace.h"
 
-#define A6XX_CP_RB_CNTL_DEFAULT (((ilog2(4) << 8) & 0x1F00) | \
-		(ilog2(KGSL_RB_DWORDS >> 1) & 0x3F))
-
 #define MIN_HBB		13
 
 #define A6XX_LLC_NUM_GPU_SCIDS		5
@@ -482,6 +479,12 @@
 	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_TWO_PASS_USE_WFI))
 		kgsl_regrmw(device, A6XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 
+	/* Enable the GMEM save/restore feature for preemption */
+	if (adreno_is_preemption_enabled(adreno_dev))
+		kgsl_regwrite(device, A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE,
+			0x1);
+
+	a6xx_preemption_start(adreno_dev);
 	a6xx_protect_init(adreno_dev);
 }
 
@@ -612,6 +615,70 @@
 }
 
 /*
+ * Follow the ME_INIT sequence with a preemption yield to allow the GPU to move
+ * to a different ringbuffer, if desired
+ */
+static int _preemption_init(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, unsigned int *cmds,
+		struct kgsl_context *context)
+{
+	unsigned int *cmds_orig = cmds;
+
+	/* Turn CP protection OFF */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 0;
+
+	*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 6);
+	*cmds++ = 1;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->preemption_desc.gpuaddr);
+
+	*cmds++ = 2;
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0);
+
+	/* Turn CP protection ON */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 1;
+
+	*cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0x0);
+	*cmds++ = 0;
+	/* generate interrupt on preemption completion */
+	*cmds++ = 0;
+
+	return cmds - cmds_orig;
+}
+
+static int a6xx_post_start(struct adreno_device *adreno_dev)
+{
+	int ret;
+	unsigned int *cmds, *start;
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 42);
+	if (IS_ERR(cmds)) {
+		KGSL_DRV_ERR(device, "error allocating preemption init cmds");
+		return PTR_ERR(cmds);
+	}
+	start = cmds;
+
+	cmds += _preemption_init(adreno_dev, rb, cmds, NULL);
+
+	rb->_wptr = rb->_wptr - (42 - (cmds - start));
+
+	ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
+	if (ret)
+		adreno_spin_idle_debug(adreno_dev,
+			"hw preemption initialization failed to idle\n");
+
+	return ret;
+}
+
+/*
  * a6xx_rb_start() - Start the ringbuffer
  * @adreno_dev: Pointer to adreno device
  * @start_type: Warm or cold start
@@ -651,7 +718,11 @@
 		return ret;
 
 	/* GPU comes up in secured mode, make it unsecured by default */
-	return adreno_set_unsecured_mode(adreno_dev, rb);
+	ret = adreno_set_unsecured_mode(adreno_dev, rb);
+	if (ret)
+		return ret;
+
+	return a6xx_post_start(adreno_dev);
 }
 
 static int _load_firmware(struct kgsl_device *device, const char *fwfile,
@@ -2086,7 +2157,7 @@
 	/* 6 - RBBM_ATB_ASYNC_OVERFLOW */
 	ADRENO_IRQ_CALLBACK(a6xx_err_callback),
 	ADRENO_IRQ_CALLBACK(NULL), /* 7 - GPC_ERR */
-	ADRENO_IRQ_CALLBACK(NULL),/* 8 - CP_SW */
+	ADRENO_IRQ_CALLBACK(a6xx_preemption_callback),/* 8 - CP_SW */
 	ADRENO_IRQ_CALLBACK(a6xx_cp_hw_err_callback), /* 9 - CP_HW_ERROR */
 	ADRENO_IRQ_CALLBACK(NULL),  /* 10 - CP_CCU_FLUSH_DEPTH_TS */
 	ADRENO_IRQ_CALLBACK(NULL), /* 11 - CP_CCU_FLUSH_COLOR_TS */
@@ -2580,6 +2651,11 @@
 	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A6XX_CP_IB2_REM_SIZE),
 	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_ADDR, A6XX_CP_ROQ_DBG_ADDR),
 	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_DATA, A6XX_CP_ROQ_DBG_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A6XX_CP_CONTEXT_SWITCH_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+			A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+			A6XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI),
 	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A6XX_RBBM_STATUS),
 	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A6XX_RBBM_STATUS3),
 	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_CTL, A6XX_RBBM_PERFCTR_CNTL),
@@ -2693,4 +2769,11 @@
 	.iommu_fault_block = a6xx_iommu_fault_block,
 	.reset = a6xx_reset,
 	.soft_reset = a6xx_soft_reset,
+	.preemption_pre_ibsubmit = a6xx_preemption_pre_ibsubmit,
+	.preemption_post_ibsubmit = a6xx_preemption_post_ibsubmit,
+	.preemption_init = a6xx_preemption_init,
+	.preemption_schedule = a6xx_preemption_schedule,
+	.preemption_set_marker = a6xx_preemption_set_marker,
+	.preemption_context_init = a6xx_preemption_context_init,
+	.preemption_context_destroy = a6xx_preemption_context_destroy,
 };
diff --git a/drivers/gpu/msm/adreno_a6xx.h b/drivers/gpu/msm/adreno_a6xx.h
index 4b96f56..ddf89d6 100644
--- a/drivers/gpu/msm/adreno_a6xx.h
+++ b/drivers/gpu/msm/adreno_a6xx.h
@@ -23,10 +23,93 @@
 #define CP_CLUSTER_SP_PS	0x4
 #define CP_CLUSTER_PS		0x5
 
+/**
+ * struct a6xx_cp_preemption_record - CP context record for
+ * preemption.
+ * @magic: (00) Value at this offset must be equal to
+ * A6XX_CP_CTXRECORD_MAGIC_REF.
+ * @info: (04) Type of record. Written non-zero (usually) by CP.
+ * we must set to zero for all ringbuffers.
+ * @errno: (08) Error code. Initialize this to A6XX_CP_CTXRECORD_ERROR_NONE.
+ * CP will update to another value if a preemption error occurs.
+ * @data: (12) DATA field in YIELD and SET_MARKER packets.
+ * Written by CP when switching out. Not used on switch-in. Initialized to 0.
+ * @cntl: (16) RB_CNTL, saved and restored by CP. We must initialize this.
+ * @rptr: (20) RB_RPTR, saved and restored by CP. We must initialize this.
+ * @wptr: (24) RB_WPTR, saved and restored by CP. We must initialize this.
+ * @_pad28: (28) Reserved/padding.
+ * @rptr_addr: (32) RB_RPTR_ADDR_LO|HI saved and restored. We must initialize.
+ * rbase: (40) RB_BASE_LO|HI saved and restored.
+ * counter: (48) Pointer to preemption counter.
+ */
+struct a6xx_cp_preemption_record {
+	uint32_t  magic;
+	uint32_t  info;
+	uint32_t  errno;
+	uint32_t  data;
+	uint32_t  cntl;
+	uint32_t  rptr;
+	uint32_t  wptr;
+	uint32_t  _pad28;
+	uint64_t  rptr_addr;
+	uint64_t  rbase;
+	uint64_t  counter;
+};
+
+/**
+ * struct a6xx_cp_smmu_info - CP preemption SMMU info.
+ * @magic: (00) The value at this offset must be equal to
+ * A6XX_CP_SMMU_INFO_MAGIC_REF.
+ * @_pad4: (04) Reserved/padding
+ * @ttbr0: (08) Base address of the page table for the
+ * incoming context.
+ * @context_idr: (16) Context Identification Register value.
+ */
+struct a6xx_cp_smmu_info {
+	uint32_t  magic;
+	uint32_t  _pad4;
+	uint64_t  ttbr0;
+	uint32_t  asid;
+	uint32_t  context_idr;
+};
+
+#define A6XX_CP_SMMU_INFO_MAGIC_REF     0x3618CDA3UL
+
+#define A6XX_CP_CTXRECORD_MAGIC_REF     0xAE399D6EUL
+/* Size of each CP preemption record */
+#define A6XX_CP_CTXRECORD_SIZE_IN_BYTES     (2112 * 1024)
+/* Size of the preemption counter block (in bytes) */
+#define A6XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE   (16 * 4)
+/* Size of the user context record block (in bytes) */
+#define A6XX_CP_CTXRECORD_USER_RESTORE_SIZE (192 * 1024)
+/* Size of the performance counter save/restore block (in bytes) */
+#define A6XX_CP_PERFCOUNTER_SAVE_RESTORE_SIZE   (4 * 1024)
+
+#define A6XX_CP_RB_CNTL_DEFAULT (((ilog2(4) << 8) & 0x1F00) | \
+		(ilog2(KGSL_RB_DWORDS >> 1) & 0x3F))
+
+/* Preemption functions */
+void a6xx_preemption_trigger(struct adreno_device *adreno_dev);
+void a6xx_preemption_schedule(struct adreno_device *adreno_dev);
+void a6xx_preemption_start(struct adreno_device *adreno_dev);
+int a6xx_preemption_init(struct adreno_device *adreno_dev);
+
+unsigned int a6xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+		unsigned int *cmds);
+unsigned int a6xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		unsigned int *cmds, struct kgsl_context *context);
+
+unsigned int a6xx_preemption_set_marker(unsigned int *cmds, int start);
+
+void a6xx_preemption_callback(struct adreno_device *adreno_dev, int bit);
+
+int a6xx_preemption_context_init(struct kgsl_context *context);
+
+void a6xx_preemption_context_destroy(struct kgsl_context *context);
 
 void a6xx_snapshot(struct adreno_device *adreno_dev,
 		struct kgsl_snapshot *snapshot);
 
 void a6xx_crashdump_init(struct adreno_device *adreno_dev);
-
 #endif
diff --git a/drivers/gpu/msm/adreno_a6xx_preempt.c b/drivers/gpu/msm/adreno_a6xx_preempt.c
new file mode 100644
index 0000000..00325e5
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a6xx_preempt.c
@@ -0,0 +1,654 @@
+/* Copyright (c) 2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "adreno.h"
+#include "adreno_a6xx.h"
+#include "a6xx_reg.h"
+#include "adreno_trace.h"
+#include "adreno_pm4types.h"
+
+#define PREEMPT_RECORD(_field) \
+		offsetof(struct a6xx_cp_preemption_record, _field)
+
+#define PREEMPT_SMMU_RECORD(_field) \
+		offsetof(struct a6xx_cp_smmu_info, _field)
+
+enum {
+	SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO = 0,
+	SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR,
+	SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR,
+	SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR,
+	SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER,
+};
+
+static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer)
+{
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	unsigned int wptr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr);
+
+	if (wptr != rb->wptr) {
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR,
+			rb->wptr);
+		/*
+		 * In case something got submitted while preemption was on
+		 * going, reset the timer.
+		 */
+		reset_timer = true;
+	}
+
+	if (reset_timer)
+		rb->dispatch_q.expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+}
+
+static inline bool adreno_move_preempt_state(struct adreno_device *adreno_dev,
+	enum adreno_preempt_states old, enum adreno_preempt_states new)
+{
+	return (atomic_cmpxchg(&adreno_dev->preempt.state, old, new) == old);
+}
+
+static void _a6xx_preemption_done(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * In the very unlikely case that the power is off, do nothing - the
+	 * state will be reset on power up and everybody will be happy
+	 */
+
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &status);
+
+	if (status & 0x1) {
+		KGSL_DRV_ERR(device,
+			"Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n",
+			status, adreno_dev->cur_rb->id,
+			adreno_get_rptr(adreno_dev->cur_rb),
+			adreno_dev->cur_rb->wptr, adreno_dev->next_rb->id,
+			adreno_get_rptr(adreno_dev->next_rb),
+			adreno_dev->next_rb->wptr);
+
+		/* Set a fault and restart */
+		adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+		adreno_dispatcher_schedule(device);
+
+		return;
+	}
+
+	del_timer_sync(&adreno_dev->preempt.timer);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb, adreno_dev->next_rb);
+
+	/* Clean up all the bits */
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr for the new command queue */
+	_update_wptr(adreno_dev, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	/* Clear the preempt state */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+}
+
+static void _a6xx_preemption_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * If the power is on check the preemption status one more time - if it
+	 * was successful then just transition to the complete state
+	 */
+	if (kgsl_state_is_awake(device)) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &status);
+
+		if (status == 0) {
+			adreno_set_preempt_state(adreno_dev,
+				ADRENO_PREEMPT_COMPLETE);
+
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+	}
+
+	KGSL_DRV_ERR(device,
+		"Preemption timed out: cur=%d R/W=%X/%X, next=%d R/W=%X/%X\n",
+		adreno_dev->cur_rb->id,
+		adreno_get_rptr(adreno_dev->cur_rb), adreno_dev->cur_rb->wptr,
+		adreno_dev->next_rb->id,
+		adreno_get_rptr(adreno_dev->next_rb),
+		adreno_dev->next_rb->wptr);
+
+	adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+	adreno_dispatcher_schedule(device);
+}
+
+static void _a6xx_preemption_worker(struct work_struct *work)
+{
+	struct adreno_preemption *preempt = container_of(work,
+		struct adreno_preemption, work);
+	struct adreno_device *adreno_dev = container_of(preempt,
+		struct adreno_device, preempt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Need to take the mutex to make sure that the power stays on */
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED))
+		_a6xx_preemption_fault(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+static void _a6xx_preemption_timer(unsigned long data)
+{
+	struct adreno_device *adreno_dev = (struct adreno_device *) data;
+
+	/* We should only be here from a triggered state */
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_FAULTED))
+		return;
+
+	/* Schedule the worker to take care of the details */
+	queue_work(system_unbound_wq, &adreno_dev->preempt.work);
+}
+
+/* Find the highest priority active ringbuffer */
+static struct adreno_ringbuffer *a6xx_next_ringbuffer(
+		struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	unsigned long flags;
+	unsigned int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		bool empty;
+
+		spin_lock_irqsave(&rb->preempt_lock, flags);
+		empty = adreno_rb_empty(rb);
+		spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+		if (empty == false)
+			return rb;
+	}
+
+	return NULL;
+}
+
+void a6xx_preemption_trigger(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+	struct adreno_ringbuffer *next;
+	uint64_t ttbr0;
+	unsigned int contextidr;
+	unsigned long flags;
+	uint32_t preempt_level = 0, usesgmem = 1, skipsaverestore = 0;
+
+	/* Put ourselves into a possible trigger state */
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START))
+		return;
+
+	/* Get the next ringbuffer to preempt in */
+	next = a6xx_next_ringbuffer(adreno_dev);
+
+	/*
+	 * Nothing to do if every ringbuffer is empty or if the current
+	 * ringbuffer is the only active one
+	 */
+	if (next == NULL || next == adreno_dev->cur_rb) {
+		/*
+		 * Update any critical things that might have been skipped while
+		 * we were looking for a new ringbuffer
+		 */
+
+		if (next != NULL) {
+			_update_wptr(adreno_dev, false);
+
+			mod_timer(&adreno_dev->dispatcher.timer,
+				adreno_dev->cur_rb->dispatch_q.expires);
+		}
+
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+		return;
+	}
+
+	/* Turn off the dispatcher timer */
+	del_timer(&adreno_dev->dispatcher.timer);
+
+	/*
+	 * This is the most critical section - we need to take care not to race
+	 * until we have programmed the CP for the switch
+	 */
+
+	spin_lock_irqsave(&next->preempt_lock, flags);
+
+	/*
+	 * Get the pagetable from the pagetable info.
+	 * The pagetable_desc is allocated and mapped at probe time, and
+	 * preemption_desc at init time, so no need to check if
+	 * sharedmem accesses to these memdescs succeed.
+	 */
+	kgsl_sharedmem_readq(&next->pagetable_desc, &ttbr0,
+		PT_INFO_OFFSET(ttbr0));
+	kgsl_sharedmem_readl(&next->pagetable_desc, &contextidr,
+		PT_INFO_OFFSET(contextidr));
+
+	kgsl_sharedmem_writel(device, &next->preemption_desc,
+		PREEMPT_RECORD(wptr), next->wptr);
+
+	spin_unlock_irqrestore(&next->preempt_lock, flags);
+
+	/* And write it to the smmu info */
+	kgsl_sharedmem_writeq(device, &iommu->smmu_info,
+		PREEMPT_SMMU_RECORD(ttbr0), ttbr0);
+	kgsl_sharedmem_writel(device, &iommu->smmu_info,
+		PREEMPT_SMMU_RECORD(context_idr), contextidr);
+
+	kgsl_regwrite(device,
+		A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_LO,
+		lower_32_bits(next->preemption_desc.gpuaddr));
+	kgsl_regwrite(device,
+		A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI,
+		upper_32_bits(next->preemption_desc.gpuaddr));
+
+	if (next->drawctxt_active) {
+		struct kgsl_context *context = &next->drawctxt_active->base;
+		uint64_t gpuaddr = context->user_ctxt_record->memdesc.gpuaddr;
+
+		kgsl_regwrite(device,
+			A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO,
+			lower_32_bits(gpuaddr));
+		kgsl_regwrite(device,
+			A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI,
+			upper_32_bits(gpuaddr));
+	}
+
+	adreno_dev->next_rb = next;
+
+	/* Start the timer to detect a stuck preemption */
+	mod_timer(&adreno_dev->preempt.timer,
+		jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT));
+
+	trace_adreno_preempt_trigger(adreno_dev->cur_rb, adreno_dev->next_rb);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED);
+
+	/* Trigger the preemption */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT,
+			((preempt_level << 6) & 0xC0) |
+			((skipsaverestore << 9) & 0x200) |
+			((usesgmem << 8) & 0x100) | 0x1);
+}
+
+void a6xx_preemption_callback(struct adreno_device *adreno_dev, int bit)
+{
+	unsigned int status;
+
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING))
+		return;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &status);
+
+	if (status & 0x1) {
+		KGSL_DRV_ERR(KGSL_DEVICE(adreno_dev),
+			"preempt interrupt with non-zero status: %X\n", status);
+
+		/*
+		 * Under the assumption that this is a race between the
+		 * interrupt and the register, schedule the worker to clean up.
+		 * If the status still hasn't resolved itself by the time we get
+		 * there then we have to assume something bad happened
+		 */
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE);
+		adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+		return;
+	}
+
+	del_timer(&adreno_dev->preempt.timer);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb,
+		adreno_dev->next_rb);
+
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr if it changed while preemption was ongoing */
+	_update_wptr(adreno_dev, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	a6xx_preemption_trigger(adreno_dev);
+}
+
+void a6xx_preemption_schedule(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE))
+		_a6xx_preemption_done(adreno_dev);
+
+	a6xx_preemption_trigger(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+unsigned int a6xx_preemption_set_marker(unsigned int *cmds, int start)
+{
+	*cmds++ = cp_type7_packet(CP_SET_MARKER, 1);
+
+	/*
+	 * Indicate the beginning  and end of the IB1 list with a SET_MARKER.
+	 * Among other things, this will implicitly enable and disable
+	 * preemption respectively.
+	 */
+	if (start)
+		*cmds++ = 0xD;
+	else
+		*cmds++ = 0xE;
+
+	return 2;
+}
+
+unsigned int a6xx_preemption_pre_ibsubmit(
+		struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb,
+		unsigned int *cmds, struct kgsl_context *context)
+{
+	unsigned int *cmds_orig = cmds;
+
+	if (context)
+		*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15);
+	else
+		*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12);
+
+	/* NULL SMMU_INFO buffer - we track in KMD */
+	*cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO;
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0x0);
+
+	*cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_NON_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds, rb->preemption_desc.gpuaddr);
+
+	*cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR;
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0);
+
+	if (context) {
+		uint64_t gpuaddr = context->user_ctxt_record->memdesc.gpuaddr;
+
+		*cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR;
+		cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr);
+	}
+
+	/*
+	 * There is no need to specify this address when we are about to
+	 * trigger preemption. This is because CP internally stores this
+	 * address specified here in the CP_SET_PSEUDO_REGISTER payload to
+	 * the context record and thus knows from where to restore
+	 * the saved perfcounters for the new ringbuffer.
+	 */
+	*cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_COUNTER;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			rb->perfcounter_save_restore_desc.gpuaddr);
+
+	return (unsigned int) (cmds - cmds_orig);
+}
+
+unsigned int a6xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+	unsigned int *cmds)
+{
+	unsigned int *cmds_orig = cmds;
+
+	*cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0x0);
+	*cmds++ = 1;
+	*cmds++ = 0;
+
+	return (unsigned int) (cmds - cmds_orig);
+}
+
+void a6xx_preemption_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+	struct adreno_ringbuffer *rb;
+	unsigned int i;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	/* Force the state to be clear */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	/* smmu_info is allocated and mapped in a6xx_preemption_iommu_init */
+	kgsl_sharedmem_writel(device, &iommu->smmu_info,
+		PREEMPT_SMMU_RECORD(magic), A6XX_CP_SMMU_INFO_MAGIC_REF);
+	kgsl_sharedmem_writeq(device, &iommu->smmu_info,
+		PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device));
+
+	/* The CP doesn't use the asid record, so poison it */
+	kgsl_sharedmem_writel(device, &iommu->smmu_info,
+		PREEMPT_SMMU_RECORD(asid), 0xDECAFBAD);
+	kgsl_sharedmem_writel(device, &iommu->smmu_info,
+		PREEMPT_SMMU_RECORD(context_idr),
+		MMU_DEFAULT_CONTEXTIDR(device));
+
+	adreno_writereg64(adreno_dev,
+		ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+		ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+		iommu->smmu_info.gpuaddr);
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		/*
+		 * preemption_desc is allocated and mapped at init time,
+		 * so no need to check sharedmem_writel return value
+		 */
+		kgsl_sharedmem_writel(device, &rb->preemption_desc,
+			PREEMPT_RECORD(rptr), 0);
+		kgsl_sharedmem_writel(device, &rb->preemption_desc,
+			PREEMPT_RECORD(wptr), 0);
+
+		adreno_ringbuffer_set_pagetable(rb,
+			device->mmu.defaultpagetable);
+	}
+}
+
+static int a6xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev,
+	struct adreno_ringbuffer *rb, uint64_t counteraddr)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	ret = kgsl_allocate_global(device, &rb->preemption_desc,
+		A6XX_CP_CTXRECORD_SIZE_IN_BYTES, 0, KGSL_MEMDESC_PRIVILEGED,
+		"preemption_desc");
+	if (ret)
+		return ret;
+
+	ret = kgsl_allocate_global(device, &rb->perfcounter_save_restore_desc,
+		A6XX_CP_PERFCOUNTER_SAVE_RESTORE_SIZE, 0,
+		KGSL_MEMDESC_PRIVILEGED, "perfcounter_save_restore_desc");
+	if (ret)
+		return ret;
+
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(magic), A6XX_CP_CTXRECORD_MAGIC_REF);
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(info), 0);
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(data), 0);
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(cntl), A6XX_CP_RB_CNTL_DEFAULT);
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(rptr), 0);
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(wptr), 0);
+	kgsl_sharedmem_writeq(device, &rb->preemption_desc,
+		PREEMPT_RECORD(rptr_addr), SCRATCH_RPTR_GPU_ADDR(device,
+		rb->id));
+	kgsl_sharedmem_writeq(device, &rb->preemption_desc,
+		PREEMPT_RECORD(rbase), rb->buffer_desc.gpuaddr);
+	kgsl_sharedmem_writeq(device, &rb->preemption_desc,
+		PREEMPT_RECORD(counter), counteraddr);
+
+	return 0;
+}
+
+#ifdef CONFIG_QCOM_KGSL_IOMMU
+static int a6xx_preemption_iommu_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+
+	/* Allocate mem for storing preemption smmu record */
+	return kgsl_allocate_global(device, &iommu->smmu_info, PAGE_SIZE,
+		KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED,
+		"smmu_info");
+}
+
+static void a6xx_preemption_iommu_close(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+
+	kgsl_free_global(device, &iommu->smmu_info);
+}
+#else
+static int a6xx_preemption_iommu_init(struct adreno_device *adreno_dev)
+{
+	return -ENODEV;
+}
+
+static void a6xx_preemption_iommu_close(struct adreno_device *adreno_dev)
+{
+}
+#endif
+
+static void a6xx_preemption_close(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	struct adreno_ringbuffer *rb;
+	unsigned int i;
+
+	del_timer(&preempt->timer);
+	kgsl_free_global(device, &preempt->counters);
+	a6xx_preemption_iommu_close(adreno_dev);
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		kgsl_free_global(device, &rb->preemption_desc);
+		kgsl_free_global(device, &rb->perfcounter_save_restore_desc);
+	}
+}
+
+int a6xx_preemption_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	struct adreno_ringbuffer *rb;
+	int ret;
+	unsigned int i;
+	uint64_t addr;
+
+	/* We are dependent on IOMMU to make preemption go on the CP side */
+	if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU)
+		return -ENODEV;
+
+	INIT_WORK(&preempt->work, _a6xx_preemption_worker);
+
+	setup_timer(&preempt->timer, _a6xx_preemption_timer,
+		(unsigned long) adreno_dev);
+
+	/* Allocate mem for storing preemption counters */
+	ret = kgsl_allocate_global(device, &preempt->counters,
+		adreno_dev->num_ringbuffers *
+		A6XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE, 0, 0,
+		"preemption_counters");
+	if (ret)
+		goto err;
+
+	addr = preempt->counters.gpuaddr;
+
+	/* Allocate mem for storing preemption switch record */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		ret = a6xx_preemption_ringbuffer_init(adreno_dev, rb, addr);
+		if (ret)
+			goto err;
+
+		addr += A6XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE;
+	}
+
+	ret = a6xx_preemption_iommu_init(adreno_dev);
+
+err:
+	if (ret)
+		a6xx_preemption_close(device);
+
+	return ret;
+}
+
+void a6xx_preemption_context_destroy(struct kgsl_context *context)
+{
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	gpumem_free_entry(context->user_ctxt_record);
+}
+
+int a6xx_preemption_context_init(struct kgsl_context *context)
+{
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv,
+			A6XX_CP_CTXRECORD_USER_RESTORE_SIZE, 0);
+	if (IS_ERR(context->user_ctxt_record)) {
+		int ret = PTR_ERR(context->user_ctxt_record);
+
+		context->user_ctxt_record = NULL;
+		return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
index f217822..c6df7bb 100644
--- a/drivers/gpu/msm/adreno_drawctxt.c
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -341,6 +341,7 @@
 	struct adreno_context *drawctxt;
 	struct kgsl_device *device = dev_priv->device;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
 	int ret;
 	unsigned int local;
 
@@ -421,6 +422,16 @@
 		return ERR_PTR(ret);
 	}
 
+	if (gpudev->preemption_context_init) {
+		ret = gpudev->preemption_context_init(&drawctxt->base);
+		if (ret != 0) {
+			kgsl_context_detach(&drawctxt->base);
+			kgsl_context_put(&drawctxt->base);
+			kfree(drawctxt);
+			return ERR_PTR(ret);
+		}
+	}
+
 	kgsl_sharedmem_writel(device, &device->memstore,
 			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, soptimestamp),
 			0);
@@ -545,10 +556,18 @@
 void adreno_drawctxt_destroy(struct kgsl_context *context)
 {
 	struct adreno_context *drawctxt;
+	struct adreno_device *adreno_dev;
+	struct adreno_gpudev *gpudev;
 
 	if (context == NULL)
 		return;
 
+	adreno_dev = ADRENO_DEVICE(context->device);
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->preemption_context_destroy)
+		gpudev->preemption_context_destroy(context);
+
 	drawctxt = ADRENO_CONTEXT(context);
 	debugfs_remove_recursive(drawctxt->debug_root);
 	kfree(drawctxt);
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
index fceceda..2a330b4 100644
--- a/drivers/gpu/msm/adreno_pm4types.h
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -55,6 +55,12 @@
 /* switches SMMU pagetable, used on a5xx only */
 #define CP_SMMU_TABLE_UPDATE 0x53
 
+/*  Set internal CP registers, used to indicate context save data addresses */
+#define CP_SET_PSEUDO_REGISTER      0x56
+
+/* Tell CP the current operation mode, indicates save and restore procedure */
+#define CP_SET_MARKER  0x65
+
 /* register read/modify/write */
 #define CP_REG_RMW		0x21
 
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index bff1fda..15c68fb 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -864,9 +864,12 @@
 			dwords += 2;
 	}
 
-	if (gpudev->preemption_yield_enable &&
-				adreno_is_preemption_enabled(adreno_dev))
-		dwords += 8;
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		if (gpudev->preemption_set_marker)
+			dwords += 4;
+		else if (gpudev->preemption_yield_enable)
+			dwords += 8;
+	}
 
 	link = kcalloc(dwords, sizeof(unsigned int), GFP_KERNEL);
 	if (!link) {
@@ -897,6 +900,10 @@
 			gpu_ticks_submitted));
 	}
 
+	if (gpudev->preemption_set_marker &&
+			adreno_is_preemption_enabled(adreno_dev))
+		cmds += gpudev->preemption_set_marker(cmds, 1);
+
 	if (numibs) {
 		list_for_each_entry(ib, &cmdobj->cmdlist, node) {
 			/*
@@ -918,9 +925,12 @@
 		}
 	}
 
-	if (gpudev->preemption_yield_enable &&
-				adreno_is_preemption_enabled(adreno_dev))
-		cmds += gpudev->preemption_yield_enable(cmds);
+	if (adreno_is_preemption_enabled(adreno_dev)) {
+		if (gpudev->preemption_set_marker)
+			cmds += gpudev->preemption_set_marker(cmds, 0);
+		else if (gpudev->preemption_yield_enable)
+			cmds += gpudev->preemption_yield_enable(cmds);
+	}
 
 	if (kernel_profiling) {
 		cmds += _get_alwayson_counter(adreno_dev, cmds,
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
index 63374af..72fc5bf3 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.h
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -92,6 +92,8 @@
  * @drawctxt_active: The last pagetable that this ringbuffer is set to
  * @preemption_desc: The memory descriptor containing
  * preemption info written/read by CP
+ * @perfcounter_save_restore_desc: Used by CP to save/restore the perfcounter
+ * values across preemption
  * @pagetable_desc: Memory to hold information about the pagetables being used
  * and the commands to switch pagetable on the RB
  * @dispatch_q: The dispatcher side queue for this ringbuffer
@@ -118,6 +120,7 @@
 	struct kgsl_event_group events;
 	struct adreno_context *drawctxt_active;
 	struct kgsl_memdesc preemption_desc;
+	struct kgsl_memdesc perfcounter_save_restore_desc;
 	struct kgsl_memdesc pagetable_desc;
 	struct adreno_dispatcher_drawqueue dispatch_q;
 	wait_queue_head_t ts_expire_waitq;
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 6bd212d..7b8cdc2 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -245,8 +245,6 @@
 }
 EXPORT_SYMBOL(kgsl_readtimestamp);
 
-static long gpumem_free_entry(struct kgsl_mem_entry *entry);
-
 /* Scheduled by kgsl_mem_entry_put_deferred() */
 static void _deferred_put(struct work_struct *work)
 {
@@ -608,7 +606,7 @@
  * detached by checking the KGSL_CONTEXT_PRIV_DETACHED bit in
  * context->priv.
  */
-static void kgsl_context_detach(struct kgsl_context *context)
+void kgsl_context_detach(struct kgsl_context *context)
 {
 	struct kgsl_device *device;
 
@@ -1812,7 +1810,7 @@
 	return 0;
 }
 
-static long gpumem_free_entry(struct kgsl_mem_entry *entry)
+long gpumem_free_entry(struct kgsl_mem_entry *entry)
 {
 	pid_t ptname = 0;
 
@@ -3054,7 +3052,7 @@
 /* The largest allowable alignment for a GPU object is 32MB */
 #define KGSL_MAX_ALIGN (32 * SZ_1M)
 
-static struct kgsl_mem_entry *gpumem_alloc_entry(
+struct kgsl_mem_entry *gpumem_alloc_entry(
 		struct kgsl_device_private *dev_priv,
 		uint64_t size, uint64_t flags)
 {
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
index 3f1c86e..c54e51e 100644
--- a/drivers/gpu/msm/kgsl.h
+++ b/drivers/gpu/msm/kgsl.h
@@ -445,6 +445,10 @@
 int kgsl_suspend_driver(struct platform_device *pdev, pm_message_t state);
 int kgsl_resume_driver(struct platform_device *pdev);
 
+struct kgsl_mem_entry *gpumem_alloc_entry(struct kgsl_device_private *dev_priv,
+				uint64_t size, uint64_t flags);
+long gpumem_free_entry(struct kgsl_mem_entry *entry);
+
 static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc,
 				uint64_t gpuaddr, uint64_t size)
 {
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index ca1f181..b621ada 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -378,6 +378,8 @@
  * @pwr_constraint: power constraint from userspace for this context
  * @fault_count: number of times gpu hanged in last _context_throttle_time ms
  * @fault_time: time of the first gpu hang in last _context_throttle_time ms
+ * @user_ctxt_record: memory descriptor used by CP to save/restore VPC data
+ * across preemption
  */
 struct kgsl_context {
 	struct kref refcount;
@@ -395,6 +397,7 @@
 	struct kgsl_pwr_constraint pwr_constraint;
 	unsigned int fault_count;
 	unsigned long fault_time;
+	struct kgsl_mem_entry *user_ctxt_record;
 };
 
 #define _context_comm(_c) \
@@ -689,6 +692,8 @@
 void kgsl_events_init(void);
 void kgsl_events_exit(void);
 
+void kgsl_context_detach(struct kgsl_context *context);
+
 void kgsl_del_event_group(struct kgsl_event_group *group);
 
 void kgsl_add_event_group(struct kgsl_event_group *group,
diff --git a/drivers/gpu/msm/kgsl_iommu.h b/drivers/gpu/msm/kgsl_iommu.h
index 6337a48..acf8ae4 100644
--- a/drivers/gpu/msm/kgsl_iommu.h
+++ b/drivers/gpu/msm/kgsl_iommu.h
@@ -23,7 +23,7 @@
  * These defines control the address range for allocations that
  * are mapped into all pagetables.
  */
-#define KGSL_IOMMU_GLOBAL_MEM_SIZE	SZ_8M
+#define KGSL_IOMMU_GLOBAL_MEM_SIZE	(20 * SZ_1M)
 #define KGSL_IOMMU_GLOBAL_MEM_BASE	0xf8000000
 
 #define KGSL_IOMMU_SECURE_SIZE SZ_256M