msm: kgsl: Add the per context preemption buffer

This buffer is used by CP to save/restore the VPC data for
the outgoing/incoming context respectively. So allocate and
map it during context initialization and then specify the
gpu address of this buffer in the preemption packets.

Change-Id: I3bb73322848e2f19f1f8e511fa5c303e57898cc8
Signed-off-by: Harshdeep Dhatt <hdhatt@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index b3b4ccb..c7e3ad7 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -851,6 +851,8 @@
 				unsigned int *cmds);
 	int (*preemption_init)(struct adreno_device *);
 	void (*preemption_schedule)(struct adreno_device *);
+	int (*preemption_context_init)(struct kgsl_context *);
+	void (*preemption_context_destroy)(struct kgsl_context *);
 	void (*enable_64bit)(struct adreno_device *);
 	void (*clk_set_options)(struct adreno_device *,
 				const char *, struct clk *, bool on);
diff --git a/drivers/gpu/msm/adreno_a6xx.c b/drivers/gpu/msm/adreno_a6xx.c
index 2c46b93..ad0ce44 100644
--- a/drivers/gpu/msm/adreno_a6xx.c
+++ b/drivers/gpu/msm/adreno_a6xx.c
@@ -2774,4 +2774,6 @@
 	.preemption_init = a6xx_preemption_init,
 	.preemption_schedule = a6xx_preemption_schedule,
 	.preemption_set_marker = a6xx_preemption_set_marker,
+	.preemption_context_init = a6xx_preemption_context_init,
+	.preemption_context_destroy = a6xx_preemption_context_destroy,
 };
diff --git a/drivers/gpu/msm/adreno_a6xx.h b/drivers/gpu/msm/adreno_a6xx.h
index 2738238..ddf89d6 100644
--- a/drivers/gpu/msm/adreno_a6xx.h
+++ b/drivers/gpu/msm/adreno_a6xx.h
@@ -80,6 +80,8 @@
 #define A6XX_CP_CTXRECORD_SIZE_IN_BYTES     (2112 * 1024)
 /* Size of the preemption counter block (in bytes) */
 #define A6XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE   (16 * 4)
+/* Size of the user context record block (in bytes) */
+#define A6XX_CP_CTXRECORD_USER_RESTORE_SIZE (192 * 1024)
 /* Size of the performance counter save/restore block (in bytes) */
 #define A6XX_CP_PERFCOUNTER_SAVE_RESTORE_SIZE   (4 * 1024)
 
@@ -102,6 +104,10 @@
 
 void a6xx_preemption_callback(struct adreno_device *adreno_dev, int bit);
 
+int a6xx_preemption_context_init(struct kgsl_context *context);
+
+void a6xx_preemption_context_destroy(struct kgsl_context *context);
+
 void a6xx_snapshot(struct adreno_device *adreno_dev,
 		struct kgsl_snapshot *snapshot);
 
diff --git a/drivers/gpu/msm/adreno_a6xx_preempt.c b/drivers/gpu/msm/adreno_a6xx_preempt.c
index c37791a..00325e5 100644
--- a/drivers/gpu/msm/adreno_a6xx_preempt.c
+++ b/drivers/gpu/msm/adreno_a6xx_preempt.c
@@ -277,6 +277,18 @@
 		A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR_HI,
 		upper_32_bits(next->preemption_desc.gpuaddr));
 
+	if (next->drawctxt_active) {
+		struct kgsl_context *context = &next->drawctxt_active->base;
+		uint64_t gpuaddr = context->user_ctxt_record->memdesc.gpuaddr;
+
+		kgsl_regwrite(device,
+			A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_LO,
+			lower_32_bits(gpuaddr));
+		kgsl_regwrite(device,
+			A6XX_CP_CONTEXT_SWITCH_NON_PRIV_RESTORE_ADDR_HI,
+			upper_32_bits(gpuaddr));
+	}
+
 	adreno_dev->next_rb = next;
 
 	/* Start the timer to detect a stuck preemption */
@@ -381,7 +393,10 @@
 {
 	unsigned int *cmds_orig = cmds;
 
-	*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12);
+	if (context)
+		*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 15);
+	else
+		*cmds++ = cp_type7_packet(CP_SET_PSEUDO_REGISTER, 12);
 
 	/* NULL SMMU_INFO buffer - we track in KMD */
 	*cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_SMMU_INFO;
@@ -393,6 +408,12 @@
 	*cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_PRIV_SECURE_SAVE_ADDR;
 	cmds += cp_gpuaddr(adreno_dev, cmds, 0);
 
+	if (context) {
+		uint64_t gpuaddr = context->user_ctxt_record->memdesc.gpuaddr;
+
+		*cmds++ = SET_PSEUDO_REGISTER_SAVE_REGISTER_NON_PRIV_SAVE_ADDR;
+		cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr);
+	}
 
 	/*
 	 * There is no need to specify this address when we are about to
@@ -600,3 +621,34 @@
 
 	return ret;
 }
+
+void a6xx_preemption_context_destroy(struct kgsl_context *context)
+{
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	gpumem_free_entry(context->user_ctxt_record);
+}
+
+int a6xx_preemption_context_init(struct kgsl_context *context)
+{
+	struct kgsl_device *device = context->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	context->user_ctxt_record = gpumem_alloc_entry(context->dev_priv,
+			A6XX_CP_CTXRECORD_USER_RESTORE_SIZE, 0);
+	if (IS_ERR(context->user_ctxt_record)) {
+		int ret = PTR_ERR(context->user_ctxt_record);
+
+		context->user_ctxt_record = NULL;
+		return ret;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
index f217822..c6df7bb 100644
--- a/drivers/gpu/msm/adreno_drawctxt.c
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -341,6 +341,7 @@
 	struct adreno_context *drawctxt;
 	struct kgsl_device *device = dev_priv->device;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
 	int ret;
 	unsigned int local;
 
@@ -421,6 +422,16 @@
 		return ERR_PTR(ret);
 	}
 
+	if (gpudev->preemption_context_init) {
+		ret = gpudev->preemption_context_init(&drawctxt->base);
+		if (ret != 0) {
+			kgsl_context_detach(&drawctxt->base);
+			kgsl_context_put(&drawctxt->base);
+			kfree(drawctxt);
+			return ERR_PTR(ret);
+		}
+	}
+
 	kgsl_sharedmem_writel(device, &device->memstore,
 			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, soptimestamp),
 			0);
@@ -545,10 +556,18 @@
 void adreno_drawctxt_destroy(struct kgsl_context *context)
 {
 	struct adreno_context *drawctxt;
+	struct adreno_device *adreno_dev;
+	struct adreno_gpudev *gpudev;
 
 	if (context == NULL)
 		return;
 
+	adreno_dev = ADRENO_DEVICE(context->device);
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->preemption_context_destroy)
+		gpudev->preemption_context_destroy(context);
+
 	drawctxt = ADRENO_CONTEXT(context);
 	debugfs_remove_recursive(drawctxt->debug_root);
 	kfree(drawctxt);
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 6bd212d..7b8cdc2 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -245,8 +245,6 @@
 }
 EXPORT_SYMBOL(kgsl_readtimestamp);
 
-static long gpumem_free_entry(struct kgsl_mem_entry *entry);
-
 /* Scheduled by kgsl_mem_entry_put_deferred() */
 static void _deferred_put(struct work_struct *work)
 {
@@ -608,7 +606,7 @@
  * detached by checking the KGSL_CONTEXT_PRIV_DETACHED bit in
  * context->priv.
  */
-static void kgsl_context_detach(struct kgsl_context *context)
+void kgsl_context_detach(struct kgsl_context *context)
 {
 	struct kgsl_device *device;
 
@@ -1812,7 +1810,7 @@
 	return 0;
 }
 
-static long gpumem_free_entry(struct kgsl_mem_entry *entry)
+long gpumem_free_entry(struct kgsl_mem_entry *entry)
 {
 	pid_t ptname = 0;
 
@@ -3054,7 +3052,7 @@
 /* The largest allowable alignment for a GPU object is 32MB */
 #define KGSL_MAX_ALIGN (32 * SZ_1M)
 
-static struct kgsl_mem_entry *gpumem_alloc_entry(
+struct kgsl_mem_entry *gpumem_alloc_entry(
 		struct kgsl_device_private *dev_priv,
 		uint64_t size, uint64_t flags)
 {
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
index 3f1c86e..c54e51e 100644
--- a/drivers/gpu/msm/kgsl.h
+++ b/drivers/gpu/msm/kgsl.h
@@ -445,6 +445,10 @@
 int kgsl_suspend_driver(struct platform_device *pdev, pm_message_t state);
 int kgsl_resume_driver(struct platform_device *pdev);
 
+struct kgsl_mem_entry *gpumem_alloc_entry(struct kgsl_device_private *dev_priv,
+				uint64_t size, uint64_t flags);
+long gpumem_free_entry(struct kgsl_mem_entry *entry);
+
 static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc,
 				uint64_t gpuaddr, uint64_t size)
 {
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index ca1f181..b621ada 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -378,6 +378,8 @@
  * @pwr_constraint: power constraint from userspace for this context
  * @fault_count: number of times gpu hanged in last _context_throttle_time ms
  * @fault_time: time of the first gpu hang in last _context_throttle_time ms
+ * @user_ctxt_record: memory descriptor used by CP to save/restore VPC data
+ * across preemption
  */
 struct kgsl_context {
 	struct kref refcount;
@@ -395,6 +397,7 @@
 	struct kgsl_pwr_constraint pwr_constraint;
 	unsigned int fault_count;
 	unsigned long fault_time;
+	struct kgsl_mem_entry *user_ctxt_record;
 };
 
 #define _context_comm(_c) \
@@ -689,6 +692,8 @@
 void kgsl_events_init(void);
 void kgsl_events_exit(void);
 
+void kgsl_context_detach(struct kgsl_context *context);
+
 void kgsl_del_event_group(struct kgsl_event_group *group);
 
 void kgsl_add_event_group(struct kgsl_event_group *group,