msm: kgsl: Add support for GPU System Cache usage

Activate and deactivate the GPU slice in the system
cache upon GPU power collapse/restore. Configure the
SCIDs for the slices used by the GPU. Currently, the
GPU uses one slice for all its blocks.
Also enable the right memory attributes for a buffer
to be allocated into the system cache. The memory
attributes used are outer-cacheable, read-allocate,
write-no-allocate.

CRs-Fixed: 1081617
Change-Id: I54d5ddce8056c0ef491a21bb19ce9fd9e30a0540
Signed-off-by: Sushmita Susheelendra <ssusheel@codeaurora.org>
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 96b01e9..3c4c76d 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -27,6 +27,7 @@
 #include "kgsl_sharedmem.h"
 #include "kgsl_iommu.h"
 #include "kgsl_trace.h"
+#include "adreno_llc.h"
 
 #include "adreno.h"
 #include "adreno_iommu.h"
@@ -1026,6 +1027,15 @@
 	/* Initialize coresight for the target */
 	adreno_coresight_init(adreno_dev);
 
+	/* Get the system cache slice descriptor for GPU */
+	adreno_dev->gpu_llc_slice = adreno_llc_getd(&pdev->dev, "gpu");
+	if (IS_ERR(adreno_dev->gpu_llc_slice)) {
+		KGSL_DRV_WARN(device,
+			"Failed to get GPU LLC slice descriptor (%ld)\n",
+			PTR_ERR(adreno_dev->gpu_llc_slice));
+		adreno_dev->gpu_llc_slice = NULL;
+	}
+
 	adreno_input_handler.private = device;
 
 #ifdef CONFIG_INPUT
@@ -1095,6 +1105,10 @@
 	adreno_coresight_remove(adreno_dev);
 	adreno_profile_close(adreno_dev);
 
+	/* Release the system cache slice descriptor */
+	if (adreno_dev->gpu_llc_slice)
+		adreno_llc_putd(adreno_dev->gpu_llc_slice);
+
 	kgsl_pwrscale_close(device);
 
 	adreno_dispatcher_close(adreno_dev);
@@ -1419,6 +1433,14 @@
 	/* Start the GPU */
 	gpudev->start(adreno_dev);
 
+	/*
+	 * The system cache control registers
+	 * live on the CX rail. Hence need
+	 * reprogramming everytime the GPU
+	 * comes out of power collapse.
+	 */
+	adreno_llc_setup(device);
+
 	/* Re-initialize the coresight registers if applicable */
 	adreno_coresight_start(adreno_dev);
 
@@ -1553,6 +1575,9 @@
 
 	adreno_ocmem_free(adreno_dev);
 
+	if (adreno_dev->gpu_llc_slice)
+		adreno_llc_deactivate_slice(adreno_dev->gpu_llc_slice);
+
 	/* Save active coresight registers if applicable */
 	adreno_coresight_stop(adreno_dev);
 
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index 9456a4f..4fe7285 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -396,6 +396,7 @@
  * @irq_storm_work: Worker to handle possible interrupt storms
  * @active_list: List to track active contexts
  * @active_list_lock: Lock to protect active_list
+ * @gpu_llc_slice: GPU system cache slice descriptor
  */
 struct adreno_device {
 	struct kgsl_device dev;    /* Must be first field in this struct */
@@ -454,6 +455,8 @@
 
 	struct list_head active_list;
 	spinlock_t active_list_lock;
+
+	void *gpu_llc_slice;
 };
 
 /**
@@ -809,6 +812,8 @@
 	void (*enable_64bit)(struct adreno_device *);
 	void (*clk_set_options)(struct adreno_device *,
 				const char *, struct clk *);
+	void (*llc_configure_gpu_scid)(struct adreno_device *adreno_dev);
+	void (*llc_enable_overrides)(struct adreno_device *adreno_dev);
 };
 
 /**
diff --git a/drivers/gpu/msm/adreno_a6xx.c b/drivers/gpu/msm/adreno_a6xx.c
index c5c2f77..8e20806 100644
--- a/drivers/gpu/msm/adreno_a6xx.c
+++ b/drivers/gpu/msm/adreno_a6xx.c
@@ -20,6 +20,7 @@
 #include "adreno_pm4types.h"
 #include "adreno_perfcounter.h"
 #include "adreno_ringbuffer.h"
+#include "adreno_llc.h"
 #include "kgsl_sharedmem.h"
 #include "kgsl_log.h"
 #include "kgsl.h"
@@ -30,6 +31,13 @@
 
 #define MIN_HBB		13
 
+#define A6XX_LLC_NUM_GPU_SCIDS		5
+#define A6XX_GPU_LLC_SCID_NUM_BITS	5
+#define A6XX_GPU_LLC_SCID_MASK \
+	((1 << (A6XX_LLC_NUM_GPU_SCIDS * A6XX_GPU_LLC_SCID_NUM_BITS)) - 1)
+#define A6XX_GPU_CX_REG_BASE		0x509E000
+#define A6XX_GPU_CX_REG_SIZE		0x1000
+
 static const struct adreno_vbif_data a630_vbif[] = {
 	{A6XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009},
 	{A6XX_RBBM_VBIF_CLIENT_QOS_CNTL, 0x3},
@@ -580,6 +588,68 @@
 	}
 }
 
+/* GPU System Cache control registers */
+#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0   0x4
+#define A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1   0x8
+
+static inline void _reg_rmw(void __iomem *regaddr,
+	unsigned int mask, unsigned int bits)
+{
+	unsigned int val = 0;
+
+	val = __raw_readl(regaddr);
+	/* Make sure the above read completes before we proceed  */
+	rmb();
+	val &= ~mask;
+	__raw_writel(val | bits, regaddr);
+	/* Make sure the above write posts before we proceed*/
+	wmb();
+}
+
+
+/*
+ * a6xx_llc_configure_gpu_scid() - Program the sub-cache ID for all GPU blocks
+ * @adreno_dev: The adreno device pointer
+ */
+static void a6xx_llc_configure_gpu_scid(struct adreno_device *adreno_dev)
+{
+	uint32_t gpu_scid;
+	uint32_t gpu_cntl1_val = 0;
+	int i;
+	void __iomem *gpu_cx_reg;
+
+	gpu_scid = adreno_llc_get_scid(adreno_dev->gpu_llc_slice);
+	for (i = 0; i < A6XX_LLC_NUM_GPU_SCIDS; i++)
+		gpu_cntl1_val = (gpu_cntl1_val << A6XX_GPU_LLC_SCID_NUM_BITS)
+			| gpu_scid;
+
+	gpu_cx_reg = ioremap(A6XX_GPU_CX_REG_BASE, A6XX_GPU_CX_REG_SIZE);
+	_reg_rmw(gpu_cx_reg + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_1,
+			A6XX_GPU_LLC_SCID_MASK, gpu_cntl1_val);
+	iounmap(gpu_cx_reg);
+}
+
+/*
+ * a6xx_llc_enable_overrides() - Override the page attributes
+ * @adreno_dev: The adreno device pointer
+ */
+static void a6xx_llc_enable_overrides(struct adreno_device *adreno_dev)
+{
+	void __iomem *gpu_cx_reg;
+
+	/*
+	 * 0x3: readnoallocoverrideen=0
+	 *      read-no-alloc=0 - Allocate lines on read miss
+	 *      writenoallocoverrideen=1
+	 *      write-no-alloc=1 - Do not allocates lines on write miss
+	 */
+	gpu_cx_reg = ioremap(A6XX_GPU_CX_REG_BASE, A6XX_GPU_CX_REG_SIZE);
+	__raw_writel(0x3, gpu_cx_reg + A6XX_GPU_CX_MISC_SYSTEM_CACHE_CNTL_0);
+	/* Make sure the above write posts before we proceed*/
+	wmb();
+	iounmap(gpu_cx_reg);
+}
+
 #define A6XX_INT_MASK \
 	((1 << A6XX_INT_CP_AHB_ERROR) |		\
 	 (1 << A6XX_INT_ATB_ASYNCFIFO_OVERFLOW) |	\
@@ -691,4 +761,6 @@
 	.regulator_disable = a6xx_sptprac_disable,
 	.microcode_read = a6xx_microcode_read,
 	.enable_64bit = a6xx_enable_64bit,
+	.llc_configure_gpu_scid = a6xx_llc_configure_gpu_scid,
+	.llc_enable_overrides = a6xx_llc_enable_overrides
 };
diff --git a/drivers/gpu/msm/adreno_llc.h b/drivers/gpu/msm/adreno_llc.h
new file mode 100644
index 0000000..f018bc0
--- /dev/null
+++ b/drivers/gpu/msm/adreno_llc.h
@@ -0,0 +1,91 @@
+/* Copyright (c) 2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_LLC_H
+#define __ADRENO_LLC_H
+
+#ifdef CONFIG_QCOM_LLCC
+#include "adreno.h"
+#include <linux/soc/qcom/llcc-qcom.h>
+
+static inline bool adreno_llc_supported(void)
+{
+	return true;
+}
+
+static inline void *adreno_llc_getd(struct device *dev, const char *name)
+{
+	return llcc_slice_getd(dev, name);
+}
+
+static inline void adreno_llc_putd(void *desc)
+{
+	llcc_slice_putd(desc);
+}
+
+static inline int adreno_llc_deactivate_slice(void *desc)
+{
+	return llcc_slice_deactivate(desc);
+}
+
+static inline int adreno_llc_get_scid(void *desc)
+{
+	return llcc_get_slice_id(desc);
+
+}
+
+static inline void adreno_llc_setup(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (adreno_dev->gpu_llc_slice)
+		if (!llcc_slice_activate(adreno_dev->gpu_llc_slice)) {
+			if (gpudev->llc_configure_gpu_scid)
+				gpudev->llc_configure_gpu_scid(adreno_dev);
+			if (gpudev->llc_enable_overrides)
+				gpudev->llc_enable_overrides(adreno_dev);
+		}
+}
+
+#else
+static inline bool adreno_llc_supported(void)
+{
+	return false;
+}
+
+static inline void *adreno_llc_getd(struct device *dev,
+		const char *name)
+{
+	return NULL;
+}
+
+static inline void adreno_llc_putd(void *desc)
+{
+}
+
+static inline int adreno_llc_deactivate_slice(void *desc)
+{
+	return 0;
+}
+
+static inline int adreno_llc_get_scid(void *desc)
+{
+	return 0;
+}
+
+static inline void adreno_llc_setup(struct kgsl_device *device)
+{
+}
+#endif
+
+#endif /* __ADRENO_LLC_H */
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index b32cb63..fdb6e0e 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -41,6 +41,14 @@
 	(((_a) >= KGSL_IOMMU_GLOBAL_MEM_BASE) && \
 	 ((_a) < (KGSL_IOMMU_GLOBAL_MEM_BASE + KGSL_IOMMU_GLOBAL_MEM_SIZE)))
 
+/*
+ * Flag to set SMMU memory attributes required to
+ * enable system cache for GPU transactions.
+ */
+#ifndef IOMMU_USE_UPSTREAM_HINT
+#define IOMMU_USE_UPSTREAM_HINT 0
+#endif
+
 static struct kgsl_mmu_pt_ops iommu_pt_ops;
 static bool need_iommu_sync;
 
@@ -1709,7 +1717,8 @@
 
 static unsigned int _get_protection_flags(struct kgsl_memdesc *memdesc)
 {
-	unsigned int flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC;
+	unsigned int flags = IOMMU_READ | IOMMU_WRITE |
+		IOMMU_NOEXEC | IOMMU_USE_UPSTREAM_HINT;
 
 	if (memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY)
 		flags &= ~IOMMU_WRITE;