Merge "msm: kgsl: Synchronize access to IOMMU cfg port"
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 373c517..55597fc 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -141,6 +141,7 @@
*/
#define ANY_ID (~0)
+#define NO_VER (~0)
static const struct {
enum adreno_gpurev gpurev;
@@ -150,45 +151,53 @@
struct adreno_gpudev *gpudev;
unsigned int istore_size;
unsigned int pix_shader_start;
- unsigned int instruction_size; /* Size of an instruction in dwords */
- unsigned int gmem_size; /* size of gmem for gpu*/
+ /* Size of an instruction in dwords */
+ unsigned int instruction_size;
+ /* size of gmem for gpu*/
+ unsigned int gmem_size;
+ /* version of pm4 microcode that supports sync_lock
+ between CPU and GPU for SMMU-v1 programming */
+ unsigned int sync_lock_pm4_ver;
+ /* version of pfp microcode that supports sync_lock
+ between CPU and GPU for SMMU-v1 programming */
+ unsigned int sync_lock_pfp_ver;
} adreno_gpulist[] = {
{ ADRENO_REV_A200, 0, 2, ANY_ID, ANY_ID,
"yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev,
- 512, 384, 3, SZ_256K },
+ 512, 384, 3, SZ_256K, NO_VER, NO_VER },
{ ADRENO_REV_A203, 0, 1, 1, ANY_ID,
"yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev,
- 512, 384, 3, SZ_256K },
+ 512, 384, 3, SZ_256K, NO_VER, NO_VER },
{ ADRENO_REV_A205, 0, 1, 0, ANY_ID,
"yamato_pm4.fw", "yamato_pfp.fw", &adreno_a2xx_gpudev,
- 512, 384, 3, SZ_256K },
+ 512, 384, 3, SZ_256K, NO_VER, NO_VER },
{ ADRENO_REV_A220, 2, 1, ANY_ID, ANY_ID,
"leia_pm4_470.fw", "leia_pfp_470.fw", &adreno_a2xx_gpudev,
- 512, 384, 3, SZ_512K },
+ 512, 384, 3, SZ_512K, NO_VER, NO_VER },
/*
* patchlevel 5 (8960v2) needs special pm4 firmware to work around
* a hardware problem.
*/
{ ADRENO_REV_A225, 2, 2, 0, 5,
"a225p5_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev,
- 1536, 768, 3, SZ_512K },
+ 1536, 768, 3, SZ_512K, NO_VER, NO_VER },
{ ADRENO_REV_A225, 2, 2, 0, 6,
"a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev,
- 1536, 768, 3, SZ_512K },
+ 1536, 768, 3, SZ_512K, 0x225011, 0x225002 },
{ ADRENO_REV_A225, 2, 2, ANY_ID, ANY_ID,
"a225_pm4.fw", "a225_pfp.fw", &adreno_a2xx_gpudev,
- 1536, 768, 3, SZ_512K },
+ 1536, 768, 3, SZ_512K, 0x225011, 0x225002 },
/* A3XX doesn't use the pix_shader_start */
{ ADRENO_REV_A305, 3, 0, 5, ANY_ID,
"a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev,
- 512, 0, 2, SZ_256K },
+ 512, 0, 2, SZ_256K, 0x3FF037, 0x3FF016 },
/* A3XX doesn't use the pix_shader_start */
{ ADRENO_REV_A320, 3, 2, ANY_ID, ANY_ID,
"a300_pm4.fw", "a300_pfp.fw", &adreno_a3xx_gpudev,
- 512, 0, 2, SZ_512K },
+ 512, 0, 2, SZ_512K, 0x3FF037, 0x3FF016 },
{ ADRENO_REV_A330, 3, 3, 0, 0,
"a330_pm4.fw", "a330_pfp.fw", &adreno_a3xx_gpudev,
- 512, 0, 2, SZ_1M },
+ 512, 0, 2, SZ_1M, NO_VER, NO_VER },
};
static irqreturn_t adreno_irq_handler(struct kgsl_device *device)
@@ -282,7 +291,7 @@
uint32_t flags)
{
unsigned int pt_val, reg_pt_val;
- unsigned int link[200];
+ unsigned int link[250];
unsigned int *cmds = &link[0];
int sizedwords = 0;
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
@@ -315,6 +324,11 @@
device->mmu.setstate_memory.gpuaddr +
KGSL_IOMMU_SETSTATE_NOP_OFFSET);
+ cmds += adreno_add_idle_cmds(adreno_dev, cmds);
+
+ /* Acquire GPU-CPU sync Lock here */
+ cmds += kgsl_mmu_sync_lock(&device->mmu, cmds);
+
pt_val = kgsl_mmu_get_pt_base_addr(&device->mmu,
device->mmu.hwpagetable);
if (flags & KGSL_MMUFLAGS_PTUPDATE) {
@@ -376,6 +390,9 @@
}
}
+ /* Release GPU-CPU sync Lock here */
+ cmds += kgsl_mmu_sync_unlock(&device->mmu, cmds);
+
if (cpu_is_msm8960())
cmds += adreno_add_change_mh_phys_limit_cmds(cmds,
kgsl_mmu_get_reg_gpuaddr(&device->mmu, 0,
@@ -388,6 +405,8 @@
device->mmu.setstate_memory.gpuaddr +
KGSL_IOMMU_SETSTATE_NOP_OFFSET);
+ cmds += adreno_add_idle_cmds(adreno_dev, cmds);
+
sizedwords += (cmds - &link[0]);
if (sizedwords) {
/* invalidate all base pointers */
@@ -402,6 +421,11 @@
kgsl_mmu_disable_clk_on_ts(&device->mmu,
adreno_dev->ringbuffer.timestamp[KGSL_MEMSTORE_GLOBAL], true);
}
+
+ if (sizedwords > (ARRAY_SIZE(link))) {
+ KGSL_DRV_ERR(device, "Temp command buffer overflow\n");
+ BUG();
+ }
}
static void adreno_gpummu_setstate(struct kgsl_device *device,
@@ -637,6 +661,7 @@
adreno_dev->pix_shader_start = adreno_gpulist[i].pix_shader_start;
adreno_dev->instruction_size = adreno_gpulist[i].instruction_size;
adreno_dev->gmem_size = adreno_gpulist[i].gmem_size;
+ adreno_dev->gpulist_index = i;
}
static struct platform_device_id adreno_id_table[] = {
@@ -1186,12 +1211,36 @@
/* Identify the specific GPU */
adreno_identify_gpu(adreno_dev);
+ if (adreno_ringbuffer_read_pm4_ucode(device)) {
+ KGSL_DRV_ERR(device, "Reading pm4 microcode failed %s\n",
+ adreno_dev->pm4_fwfile);
+ BUG_ON(1);
+ }
+
+ if (adreno_ringbuffer_read_pfp_ucode(device)) {
+ KGSL_DRV_ERR(device, "Reading pfp microcode failed %s\n",
+ adreno_dev->pfp_fwfile);
+ BUG_ON(1);
+ }
+
if (adreno_dev->gpurev == ADRENO_REV_UNKNOWN) {
KGSL_DRV_ERR(device, "Unknown chip ID %x\n",
adreno_dev->chip_id);
goto error_clk_off;
}
+
+ /*
+ * Check if firmware supports the sync lock PM4 packets needed
+ * for IOMMUv1
+ */
+
+ if ((adreno_dev->pm4_fw_version >=
+ adreno_gpulist[adreno_dev->gpulist_index].sync_lock_pm4_ver) &&
+ (adreno_dev->pfp_fw_version >=
+ adreno_gpulist[adreno_dev->gpulist_index].sync_lock_pfp_ver))
+ device->mmu.flags |= KGSL_MMU_FLAGS_IOMMU_SYNC;
+
/* Set up the MMU */
if (adreno_is_a2xx(adreno_dev)) {
/*
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index cf16995..836192c 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -86,9 +86,11 @@
const char *pfp_fwfile;
unsigned int *pfp_fw;
size_t pfp_fw_size;
+ unsigned int pfp_fw_version;
const char *pm4_fwfile;
unsigned int *pm4_fw;
size_t pm4_fw_size;
+ unsigned int pm4_fw_version;
struct adreno_ringbuffer ringbuffer;
unsigned int mharb;
struct adreno_gpudev *gpudev;
@@ -98,6 +100,7 @@
unsigned int instruction_size;
unsigned int ib_check_level;
unsigned int fast_hang_detect;
+ unsigned int gpulist_index;
struct ocmem_buf *ocmem_hdl;
unsigned int ocmem_base;
};
@@ -366,4 +369,26 @@
return cmds - start;
}
+/*
+ * adreno_idle_cmds - Add pm4 packets for GPU idle
+ * @adreno_dev - Pointer to device structure
+ * @cmds - Pointer to memory where idle commands need to be added
+ */
+static inline int adreno_add_idle_cmds(struct adreno_device *adreno_dev,
+ unsigned int *cmds)
+{
+ unsigned int *start = cmds;
+
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0;
+
+ if ((adreno_dev->gpurev == ADRENO_REV_A305) ||
+ (adreno_dev->gpurev == ADRENO_REV_A320)) {
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+ *cmds++ = 0;
+ }
+
+ return cmds - start;
+}
+
#endif /*__ADRENO_H */
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
index 016862b..6ec11ea 100644
--- a/drivers/gpu/msm/adreno_pm4types.h
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -142,6 +142,12 @@
/* copy sequencer instruction memory to system memory */
#define CP_IM_STORE 0x2c
+/* test 2 memory locations to dword values specified */
+#define CP_TEST_TWO_MEMS 0x71
+
+/* PFP waits until the FIFO between the PFP and the ME is empty */
+#define CP_WAIT_FOR_ME 0x13
+
/*
* for a20x
* program an offset that will added to the BIN_BASE value of
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index 8af361a..97b35b0 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -209,10 +209,10 @@
return (*data != NULL) ? 0 : -ENOMEM;
}
-static int adreno_ringbuffer_load_pm4_ucode(struct kgsl_device *device)
+int adreno_ringbuffer_read_pm4_ucode(struct kgsl_device *device)
{
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
- int i, ret = 0;
+ int ret = 0;
if (adreno_dev->pm4_fw == NULL) {
int len;
@@ -234,24 +234,41 @@
adreno_dev->pm4_fw_size = len / sizeof(uint32_t);
adreno_dev->pm4_fw = ptr;
+ adreno_dev->pm4_fw_version = adreno_dev->pm4_fw[1];
+ }
+
+err:
+ return ret;
+}
+
+
+int adreno_ringbuffer_load_pm4_ucode(struct kgsl_device *device)
+{
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ int i;
+
+ if (adreno_dev->pm4_fw == NULL) {
+ int ret = adreno_ringbuffer_read_pm4_ucode(device);
+ if (ret)
+ return ret;
}
KGSL_DRV_INFO(device, "loading pm4 ucode version: %d\n",
- adreno_dev->pm4_fw[0]);
+ adreno_dev->pm4_fw_version);
adreno_regwrite(device, REG_CP_DEBUG, CP_DEBUG_DEFAULT);
adreno_regwrite(device, REG_CP_ME_RAM_WADDR, 0);
for (i = 1; i < adreno_dev->pm4_fw_size; i++)
adreno_regwrite(device, REG_CP_ME_RAM_DATA,
- adreno_dev->pm4_fw[i]);
-err:
- return ret;
+ adreno_dev->pm4_fw[i]);
+
+ return 0;
}
-static int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device)
+int adreno_ringbuffer_read_pfp_ucode(struct kgsl_device *device)
{
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
- int i, ret = 0;
+ int ret = 0;
if (adreno_dev->pfp_fw == NULL) {
int len;
@@ -272,18 +289,34 @@
adreno_dev->pfp_fw_size = len / sizeof(uint32_t);
adreno_dev->pfp_fw = ptr;
+ adreno_dev->pfp_fw_version = adreno_dev->pfp_fw[5];
+ }
+
+err:
+ return ret;
+}
+
+int adreno_ringbuffer_load_pfp_ucode(struct kgsl_device *device)
+{
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ int i;
+
+ if (adreno_dev->pfp_fw == NULL) {
+ int ret = adreno_ringbuffer_read_pfp_ucode(device);
+ if (ret)
+ return ret;
}
KGSL_DRV_INFO(device, "loading pfp ucode version: %d\n",
- adreno_dev->pfp_fw[0]);
+ adreno_dev->pfp_fw_version);
adreno_regwrite(device, adreno_dev->gpudev->reg_cp_pfp_ucode_addr, 0);
for (i = 1; i < adreno_dev->pfp_fw_size; i++)
adreno_regwrite(device,
- adreno_dev->gpudev->reg_cp_pfp_ucode_data,
- adreno_dev->pfp_fw[i]);
-err:
- return ret;
+ adreno_dev->gpudev->reg_cp_pfp_ucode_data,
+ adreno_dev->pfp_fw[i]);
+
+ return 0;
}
int adreno_ringbuffer_start(struct adreno_ringbuffer *rb, unsigned int init_ram)
@@ -390,7 +423,6 @@
GSL_RB_MEMPTRS_SCRATCH_MASK);
/* load the CP ucode */
-
status = adreno_ringbuffer_load_pm4_ucode(device);
if (status != 0)
return status;
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
index 4f58a15..50d9c25 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.h
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -130,6 +130,10 @@
struct adreno_context *context,
unsigned int numcmds);
+int adreno_ringbuffer_read_pfp_ucode(struct kgsl_device *device);
+
+int adreno_ringbuffer_read_pm4_ucode(struct kgsl_device *device);
+
static inline int adreno_ringbuffer_count(struct adreno_ringbuffer *rb,
unsigned int rptr)
{
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index 31491d5..b647ae2 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -18,6 +18,9 @@
#include <linux/iommu.h>
#include <linux/msm_kgsl.h>
#include <mach/socinfo.h>
+#include <mach/msm_iomap.h>
+#include <mach/board.h>
+#include <stddef.h>
#include "kgsl.h"
#include "kgsl_device.h"
@@ -27,6 +30,8 @@
#include "adreno_pm4types.h"
#include "adreno.h"
#include "kgsl_trace.h"
+#include "z180.h"
+
static struct kgsl_iommu_register_list kgsl_iommuv1_reg[KGSL_IOMMU_REG_MAX] = {
{ 0, 0, 0 }, /* GLOBAL_BASE */
@@ -46,6 +51,8 @@
{ 0x008, 0, 0 } /* RESUME */
};
+struct remote_iommu_petersons_spinlock kgsl_iommu_sync_lock_vars;
+
static int get_iommu_unit(struct device *dev, struct kgsl_mmu **mmu_out,
struct kgsl_iommu_unit **iommu_unit_out)
{
@@ -546,6 +553,195 @@
}
/*
+ * kgsl_iommu_start_sync_lock - Initialize some variables during MMU start up
+ * for GPU CPU synchronization
+ * @mmu - Pointer to mmu device
+ *
+ * Return - 0 on success else error code
+ */
+static int kgsl_iommu_start_sync_lock(struct kgsl_mmu *mmu)
+{
+ struct kgsl_iommu *iommu = mmu->priv;
+ uint32_t lock_gpu_addr = 0;
+
+ if (KGSL_DEVICE_3D0 != mmu->device->id ||
+ !msm_soc_version_supports_iommu_v1() ||
+ !kgsl_mmu_is_perprocess() ||
+ iommu->sync_lock_vars)
+ return 0;
+
+ if (!(mmu->flags & KGSL_MMU_FLAGS_IOMMU_SYNC)) {
+ KGSL_DRV_ERR(mmu->device,
+ "The GPU microcode does not support IOMMUv1 sync opcodes\n");
+ return -ENXIO;
+ }
+ /* Store Lock variables GPU address */
+ lock_gpu_addr = (iommu->sync_lock_desc.gpuaddr +
+ iommu->sync_lock_offset);
+
+ kgsl_iommu_sync_lock_vars.flag[PROC_APPS] = (lock_gpu_addr +
+ (offsetof(struct remote_iommu_petersons_spinlock,
+ flag[PROC_APPS])));
+ kgsl_iommu_sync_lock_vars.flag[PROC_GPU] = (lock_gpu_addr +
+ (offsetof(struct remote_iommu_petersons_spinlock,
+ flag[PROC_GPU])));
+ kgsl_iommu_sync_lock_vars.turn = (lock_gpu_addr +
+ (offsetof(struct remote_iommu_petersons_spinlock, turn)));
+
+ iommu->sync_lock_vars = &kgsl_iommu_sync_lock_vars;
+
+ return 0;
+}
+
+/*
+ * kgsl_get_sync_lock - Init Sync Lock between GPU and CPU
+ * @mmu - Pointer to mmu device
+ *
+ * Return - 0 on success else error code
+ */
+static int kgsl_iommu_init_sync_lock(struct kgsl_mmu *mmu)
+{
+ struct kgsl_iommu *iommu = mmu->device->mmu.priv;
+ int status = 0;
+ uint32_t lock_phy_addr = 0;
+ uint32_t page_offset = 0;
+
+ if (KGSL_DEVICE_3D0 != mmu->device->id ||
+ !msm_soc_version_supports_iommu_v1() ||
+ !kgsl_mmu_is_perprocess())
+ return status;
+
+ /* Return if already initialized */
+ if (iommu->sync_lock_initialized)
+ return status;
+
+ /* Get the physical address of the Lock variables */
+ lock_phy_addr = (msm_iommu_lock_initialize()
+ - MSM_SHARED_RAM_BASE + msm_shared_ram_phys);
+
+ if (!lock_phy_addr) {
+ KGSL_DRV_ERR(mmu->device,
+ "GPU CPU sync lock is not supported by kernel\n");
+ return -ENXIO;
+ }
+
+ /* Align the physical address to PAGE boundary and store the offset */
+ page_offset = (lock_phy_addr & (PAGE_SIZE - 1));
+ lock_phy_addr = (lock_phy_addr & ~(PAGE_SIZE - 1));
+ iommu->sync_lock_desc.physaddr = (unsigned int)lock_phy_addr;
+ iommu->sync_lock_offset = page_offset;
+
+ iommu->sync_lock_desc.size =
+ PAGE_ALIGN(sizeof(kgsl_iommu_sync_lock_vars));
+ status = memdesc_sg_phys(&iommu->sync_lock_desc,
+ iommu->sync_lock_desc.physaddr,
+ iommu->sync_lock_desc.size);
+
+ if (status)
+ return status;
+
+ /* Flag Sync Lock is Initialized */
+ iommu->sync_lock_initialized = 1;
+
+ return status;
+}
+
+/*
+ * kgsl_iommu_sync_lock - Acquire Sync Lock between GPU and CPU
+ * @mmu - Pointer to mmu device
+ * @cmds - Pointer to array of commands
+ *
+ * Return - int - number of commands.
+ */
+inline unsigned int kgsl_iommu_sync_lock(struct kgsl_mmu *mmu,
+ unsigned int *cmds)
+{
+ struct kgsl_device *device = mmu->device;
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ struct kgsl_iommu *iommu = mmu->device->mmu.priv;
+ struct remote_iommu_petersons_spinlock *lock_vars =
+ iommu->sync_lock_vars;
+ unsigned int *start = cmds;
+
+ if (!iommu->sync_lock_initialized)
+ return 0;
+
+ *cmds++ = cp_type3_packet(CP_MEM_WRITE, 2);
+ *cmds++ = lock_vars->flag[PROC_GPU];
+ *cmds++ = 1;
+
+ cmds += adreno_add_idle_cmds(adreno_dev, cmds);
+
+ *cmds++ = cp_type3_packet(CP_WAIT_REG_MEM, 5);
+ /* MEM SPACE = memory, FUNCTION = equals */
+ *cmds++ = 0x13;
+ *cmds++ = lock_vars->flag[PROC_GPU];
+ *cmds++ = 0x1;
+ *cmds++ = 0x1;
+ *cmds++ = 0x1;
+
+ *cmds++ = cp_type3_packet(CP_MEM_WRITE, 2);
+ *cmds++ = lock_vars->turn;
+ *cmds++ = 0;
+
+ cmds += adreno_add_idle_cmds(adreno_dev, cmds);
+
+ *cmds++ = cp_type3_packet(CP_WAIT_REG_MEM, 5);
+ /* MEM SPACE = memory, FUNCTION = equals */
+ *cmds++ = 0x13;
+ *cmds++ = lock_vars->flag[PROC_GPU];
+ *cmds++ = 0x1;
+ *cmds++ = 0x1;
+ *cmds++ = 0x1;
+
+ *cmds++ = cp_type3_packet(CP_TEST_TWO_MEMS, 3);
+ *cmds++ = lock_vars->flag[PROC_APPS];
+ *cmds++ = lock_vars->turn;
+ *cmds++ = 0;
+
+ cmds += adreno_add_idle_cmds(adreno_dev, cmds);
+
+ return cmds - start;
+}
+
+/*
+ * kgsl_iommu_sync_lock - Release Sync Lock between GPU and CPU
+ * @mmu - Pointer to mmu device
+ * @cmds - Pointer to array of commands
+ *
+ * Return - int - number of commands.
+ */
+inline unsigned int kgsl_iommu_sync_unlock(struct kgsl_mmu *mmu,
+ unsigned int *cmds)
+{
+ struct kgsl_device *device = mmu->device;
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ struct kgsl_iommu *iommu = mmu->device->mmu.priv;
+ struct remote_iommu_petersons_spinlock *lock_vars =
+ iommu->sync_lock_vars;
+ unsigned int *start = cmds;
+
+ if (!iommu->sync_lock_initialized)
+ return 0;
+
+ *cmds++ = cp_type3_packet(CP_MEM_WRITE, 2);
+ *cmds++ = lock_vars->flag[PROC_GPU];
+ *cmds++ = 0;
+
+ *cmds++ = cp_type3_packet(CP_WAIT_REG_MEM, 5);
+ /* MEM SPACE = memory, FUNCTION = equals */
+ *cmds++ = 0x13;
+ *cmds++ = lock_vars->flag[PROC_GPU];
+ *cmds++ = 0x0;
+ *cmds++ = 0x1;
+ *cmds++ = 0x1;
+
+ cmds += adreno_add_idle_cmds(adreno_dev, cmds);
+
+ return cmds - start;
+}
+
+/*
* kgsl_get_iommu_ctxt - Get device pointer to IOMMU contexts
* @mmu - Pointer to mmu device
*
@@ -727,23 +923,27 @@
return 0;
for (i = 0; i < iommu->unit_count; i++) {
- iommu->iommu_units[i].reg_map.priv |= KGSL_MEMDESC_GLOBAL;
- status = kgsl_mmu_map(pt,
+ status = kgsl_mmu_map_global(pt,
&(iommu->iommu_units[i].reg_map),
GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
- if (status) {
- iommu->iommu_units[i].reg_map.priv &=
- ~KGSL_MEMDESC_GLOBAL;
+ if (status)
goto err;
- }
}
+
+ /* Map Lock variables to GPU pagetable */
+ if (iommu->sync_lock_initialized) {
+ status = kgsl_mmu_map_global(pt, &iommu->sync_lock_desc,
+ GSL_PT_PAGE_RV | GSL_PT_PAGE_WV);
+ if (status)
+ goto err;
+ }
+
return 0;
err:
- for (i--; i >= 0; i--) {
+ for (i--; i >= 0; i--)
kgsl_mmu_unmap(pt,
&(iommu->iommu_units[i].reg_map));
- iommu->iommu_units[i].reg_map.priv &= ~KGSL_MEMDESC_GLOBAL;
- }
+
return status;
}
@@ -763,6 +963,9 @@
int i;
for (i = 0; i < iommu->unit_count; i++)
kgsl_mmu_unmap(pt, &(iommu->iommu_units[i].reg_map));
+
+ if (iommu->sync_lock_desc.gpuaddr)
+ kgsl_mmu_unmap(pt, &iommu->sync_lock_desc);
}
@@ -790,6 +993,9 @@
status = kgsl_set_register_map(mmu);
if (status)
goto done;
+ status = kgsl_iommu_init_sync_lock(mmu);
+ if (status)
+ goto done;
iommu->iommu_reg_list = kgsl_iommuv1_reg;
iommu->ctx_offset = KGSL_IOMMU_CTX_OFFSET_V1;
@@ -887,6 +1093,10 @@
if (status)
return -ENOMEM;
}
+ status = kgsl_iommu_start_sync_lock(mmu);
+ if (status)
+ return status;
+
/* We use the GPU MMU to control access to IOMMU registers on 8960 with
* a225, hence we still keep the MMU active on 8960 */
if (cpu_is_msm8960()) {
@@ -1068,7 +1278,12 @@
if (reg_map->hostptr)
iounmap(reg_map->hostptr);
kgsl_sg_free(reg_map->sg, reg_map->sglen);
+ reg_map->priv &= ~KGSL_MEMDESC_GLOBAL;
}
+ /* clear IOMMU GPU CPU sync structures */
+ kgsl_sg_free(iommu->sync_lock_desc.sg, iommu->sync_lock_desc.sglen);
+ memset(&iommu->sync_lock_desc, 0, sizeof(iommu->sync_lock_desc));
+ iommu->sync_lock_vars = NULL;
kfree(iommu);
@@ -1125,6 +1340,9 @@
pt_base &= (iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_mask <<
iommu->iommu_reg_list[KGSL_IOMMU_CTX_TTBR0].reg_shift);
+ /* Acquire GPU-CPU sync Lock here */
+ msm_iommu_lock();
+
if (flags & KGSL_MMUFLAGS_PTUPDATE) {
kgsl_idle(mmu->device);
for (i = 0; i < iommu->unit_count; i++) {
@@ -1151,6 +1369,10 @@
mb();
}
}
+
+ /* Release GPU-CPU sync Lock here */
+ msm_iommu_unlock();
+
/* Disable smmu clock */
kgsl_iommu_disable_clk_on_ts(mmu, 0, false);
}
@@ -1203,6 +1425,8 @@
/* These callbacks will be set on some chipsets */
.mmu_setup_pt = NULL,
.mmu_cleanup_pt = NULL,
+ .mmu_sync_lock = kgsl_iommu_sync_lock,
+ .mmu_sync_unlock = kgsl_iommu_sync_unlock,
};
struct kgsl_mmu_pt_ops iommu_pt_ops = {
diff --git a/drivers/gpu/msm/kgsl_iommu.h b/drivers/gpu/msm/kgsl_iommu.h
index 661b4f0..f539b07 100644
--- a/drivers/gpu/msm/kgsl_iommu.h
+++ b/drivers/gpu/msm/kgsl_iommu.h
@@ -120,6 +120,13 @@
* @ctx_offset: The context offset to be added to base address when
* accessing IOMMU registers
* @iommu_reg_list: List of IOMMU registers { offset, map, shift } array
+ * @sync_lock_vars: Pointer to the IOMMU spinlock for serializing access to the
+ * IOMMU registers
+ * @sync_lock_desc: GPU Memory descriptor for the memory containing the
+ * spinlocks
+ * @sync_lock_offset - The page offset within a page at which the sync
+ * variables are located
+ * @sync_lock_initialized: True if the sync_lock feature is enabled
*/
struct kgsl_iommu {
struct kgsl_iommu_unit iommu_units[KGSL_IOMMU_MAX_UNITS];
@@ -129,6 +136,10 @@
struct kgsl_device *device;
unsigned int ctx_offset;
struct kgsl_iommu_register_list *iommu_reg_list;
+ struct remote_iommu_petersons_spinlock *sync_lock_vars;
+ struct kgsl_memdesc sync_lock_desc;
+ unsigned int sync_lock_offset;
+ bool sync_lock_initialized;
};
/*
diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h
index c8d637e..9d96633 100644
--- a/drivers/gpu/msm/kgsl_mmu.h
+++ b/drivers/gpu/msm/kgsl_mmu.h
@@ -152,6 +152,10 @@
struct kgsl_pagetable *pt);
void (*mmu_cleanup_pt) (struct kgsl_mmu *mmu,
struct kgsl_pagetable *pt);
+ unsigned int (*mmu_sync_lock)
+ (struct kgsl_mmu *mmu, unsigned int *cmds);
+ unsigned int (*mmu_sync_unlock)
+ (struct kgsl_mmu *mmu, unsigned int *cmds);
};
struct kgsl_mmu_pt_ops {
@@ -166,6 +170,8 @@
void (*mmu_destroy_pagetable) (void *pt);
};
+#define KGSL_MMU_FLAGS_IOMMU_SYNC BIT(31)
+
struct kgsl_mmu {
unsigned int refcnt;
uint32_t flags;
@@ -398,4 +404,24 @@
return 0;
}
+static inline int kgsl_mmu_sync_lock(struct kgsl_mmu *mmu,
+ unsigned int *cmds)
+{
+ if ((mmu->flags & KGSL_MMU_FLAGS_IOMMU_SYNC) &&
+ mmu->mmu_ops && mmu->mmu_ops->mmu_sync_lock)
+ return mmu->mmu_ops->mmu_sync_lock(mmu, cmds);
+ else
+ return 0;
+}
+
+static inline int kgsl_mmu_sync_unlock(struct kgsl_mmu *mmu,
+ unsigned int *cmds)
+{
+ if ((mmu->flags & KGSL_MMU_FLAGS_IOMMU_SYNC) &&
+ mmu->mmu_ops && mmu->mmu_ops->mmu_sync_unlock)
+ return mmu->mmu_ops->mmu_sync_unlock(mmu, cmds);
+ else
+ return 0;
+}
+
#endif /* __KGSL_MMU_H */