Merge "msm: kgsl: Switch pagetables in stream for IOMMU" into msm-3.0
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index 1432ccf..2f503ae 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -21,6 +21,7 @@
#include "kgsl_pwrscale.h"
#include "kgsl_cffdump.h"
#include "kgsl_sharedmem.h"
+#include "kgsl_iommu.h"
#include "adreno.h"
#include "adreno_pm4types.h"
@@ -243,7 +244,144 @@
return result;
}
-static void adreno_setstate(struct kgsl_device *device,
+static void adreno_iommu_setstate(struct kgsl_device *device,
+ uint32_t flags)
+{
+ unsigned int pt_val, reg_pt_val;
+ unsigned int link[200];
+ unsigned int *cmds = &link[0];
+ int sizedwords = 0;
+ struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+ struct kgsl_memdesc **reg_map_desc;
+ void *reg_map_array;
+ int num_iommu_units, i;
+
+ if (!adreno_dev->drawctxt_active)
+ return kgsl_mmu_device_setstate(&device->mmu, flags);
+ num_iommu_units = kgsl_mmu_get_reg_map_desc(&device->mmu,
+ ®_map_array);
+ reg_map_desc = reg_map_array;
+
+ if (kgsl_mmu_enable_clk(&device->mmu,
+ KGSL_IOMMU_CONTEXT_USER))
+ goto done;
+
+ if (adreno_is_a225(adreno_dev))
+ cmds += adreno_add_change_mh_phys_limit_cmds(cmds, 0xFFFFF000,
+ device->mmu.setstate_memory.gpuaddr +
+ KGSL_IOMMU_SETSTATE_NOP_OFFSET);
+ else
+ cmds += adreno_add_bank_change_cmds(cmds,
+ KGSL_IOMMU_CONTEXT_USER,
+ device->mmu.setstate_memory.gpuaddr +
+ KGSL_IOMMU_SETSTATE_NOP_OFFSET);
+
+ if (flags & KGSL_MMUFLAGS_PTUPDATE) {
+ pt_val = kgsl_mmu_pt_get_base_addr(device->mmu.hwpagetable);
+ /*
+ * We need to perfrom the following operations for all
+ * IOMMU units
+ */
+ for (i = 0; i < num_iommu_units; i++) {
+ reg_pt_val = (pt_val &
+ (KGSL_IOMMU_TTBR0_PA_MASK <<
+ KGSL_IOMMU_TTBR0_PA_SHIFT)) +
+ kgsl_mmu_get_pt_lsb(&device->mmu, i,
+ KGSL_IOMMU_CONTEXT_USER);
+ /*
+ * Set address of the new pagetable by writng to IOMMU
+ * TTBR0 register
+ */
+ *cmds++ = cp_type3_packet(CP_MEM_WRITE, 2);
+ *cmds++ = reg_map_desc[i]->gpuaddr +
+ (KGSL_IOMMU_CONTEXT_USER <<
+ KGSL_IOMMU_CTX_SHIFT) + KGSL_IOMMU_TTBR0;
+ *cmds++ = reg_pt_val;
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+
+ /*
+ * Read back the ttbr0 register as a barrier to ensure
+ * above writes have completed
+ */
+ cmds += adreno_add_read_cmds(device, cmds,
+ reg_map_desc[i]->gpuaddr +
+ (KGSL_IOMMU_CONTEXT_USER <<
+ KGSL_IOMMU_CTX_SHIFT) + KGSL_IOMMU_TTBR0,
+ reg_pt_val,
+ device->mmu.setstate_memory.gpuaddr +
+ KGSL_IOMMU_SETSTATE_NOP_OFFSET);
+
+ /* set the asid */
+ *cmds++ = cp_type3_packet(CP_MEM_WRITE, 2);
+ *cmds++ = reg_map_desc[i]->gpuaddr +
+ (KGSL_IOMMU_CONTEXT_USER <<
+ KGSL_IOMMU_CTX_SHIFT) + KGSL_IOMMU_CONTEXTIDR;
+ *cmds++ = kgsl_mmu_get_hwpagetable_asid(&device->mmu);
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+
+ /* Read back asid to ensure above write completes */
+ cmds += adreno_add_read_cmds(device, cmds,
+ reg_map_desc[i]->gpuaddr +
+ (KGSL_IOMMU_CONTEXT_USER <<
+ KGSL_IOMMU_CTX_SHIFT) + KGSL_IOMMU_CONTEXTIDR,
+ kgsl_mmu_get_hwpagetable_asid(&device->mmu),
+ device->mmu.setstate_memory.gpuaddr +
+ KGSL_IOMMU_SETSTATE_NOP_OFFSET);
+ }
+ /* invalidate all base pointers */
+ *cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
+ *cmds++ = 0x7fff;
+
+ if (flags & KGSL_MMUFLAGS_TLBFLUSH)
+ cmds += __adreno_add_idle_indirect_cmds(cmds,
+ device->mmu.setstate_memory.gpuaddr +
+ KGSL_IOMMU_SETSTATE_NOP_OFFSET);
+ }
+ if (flags & KGSL_MMUFLAGS_TLBFLUSH) {
+ /*
+ * tlb flush based on asid, no need to flush entire tlb
+ */
+ for (i = 0; i < num_iommu_units; i++) {
+ *cmds++ = cp_type3_packet(CP_MEM_WRITE, 2);
+ *cmds++ = (reg_map_desc[i]->gpuaddr +
+ (KGSL_IOMMU_CONTEXT_USER <<
+ KGSL_IOMMU_CTX_SHIFT) +
+ KGSL_IOMMU_CTX_TLBIASID);
+ *cmds++ = kgsl_mmu_get_hwpagetable_asid(&device->mmu);
+ cmds += adreno_add_read_cmds(device, cmds,
+ reg_map_desc[i]->gpuaddr +
+ (KGSL_IOMMU_CONTEXT_USER <<
+ KGSL_IOMMU_CTX_SHIFT) +
+ KGSL_IOMMU_CONTEXTIDR,
+ kgsl_mmu_get_hwpagetable_asid(&device->mmu),
+ device->mmu.setstate_memory.gpuaddr +
+ KGSL_IOMMU_SETSTATE_NOP_OFFSET);
+ }
+ }
+
+ if (adreno_is_a225(adreno_dev))
+ cmds += adreno_add_change_mh_phys_limit_cmds(cmds,
+ reg_map_desc[num_iommu_units - 1]->gpuaddr - PAGE_SIZE,
+ device->mmu.setstate_memory.gpuaddr +
+ KGSL_IOMMU_SETSTATE_NOP_OFFSET);
+ else
+ cmds += adreno_add_bank_change_cmds(cmds,
+ KGSL_IOMMU_CONTEXT_PRIV,
+ device->mmu.setstate_memory.gpuaddr +
+ KGSL_IOMMU_SETSTATE_NOP_OFFSET);
+
+ sizedwords += (cmds - &link[0]);
+ if (sizedwords)
+ adreno_ringbuffer_issuecmds(device,
+ KGSL_CMD_FLAGS_PMODE, &link[0], sizedwords);
+done:
+ if (num_iommu_units)
+ kfree(reg_map_array);
+}
+
+static void adreno_gpummu_setstate(struct kgsl_device *device,
uint32_t flags)
{
struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
@@ -253,16 +391,6 @@
unsigned int mh_mmu_invalidate = 0x00000003; /*invalidate all and tc */
/*
- * A3XX doesn't support the fast path (the registers don't even exist)
- * so just bail out early
- */
-
- if (adreno_is_a3xx(adreno_dev)) {
- kgsl_mmu_device_setstate(&device->mmu, flags);
- return;
- }
-
- /*
* If possible, then set the state via the command stream to avoid
* a CPU idle. Otherwise, use the default setstate which uses register
* writes For CFF dump we must idle and use the registers so that it is
@@ -348,6 +476,16 @@
}
}
+static void adreno_setstate(struct kgsl_device *device,
+ uint32_t flags)
+{
+ /* call the mmu specific handler */
+ if (KGSL_MMU_TYPE_GPU == kgsl_mmu_get_mmutype())
+ return adreno_gpummu_setstate(device, flags);
+ else if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype())
+ return adreno_iommu_setstate(device, flags);
+}
+
static unsigned int
a3xx_getchipid(struct kgsl_device *device)
{
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
index a7ea20c..4ce56a4 100644
--- a/drivers/gpu/msm/adreno.h
+++ b/drivers/gpu/msm/adreno.h
@@ -16,6 +16,7 @@
#include "kgsl_device.h"
#include "adreno_drawctxt.h"
#include "adreno_ringbuffer.h"
+#include "kgsl_iommu.h"
#define DEVICE_3D_NAME "kgsl-3d"
#define DEVICE_3D0_NAME "kgsl-3d0"
@@ -223,4 +224,70 @@
return (ilog2(size) - 5) << 29;
}
+static inline int __adreno_add_idle_indirect_cmds(unsigned int *cmds,
+ unsigned int nop_gpuaddr)
+{
+ /* Adding an indirect buffer ensures that the prefetch stalls until
+ * the commands in indirect buffer have completed. We need to stall
+ * prefetch with a nop indirect buffer when updating pagetables
+ * because it provides stabler synchronization */
+ *cmds++ = CP_HDR_INDIRECT_BUFFER_PFD;
+ *cmds++ = nop_gpuaddr;
+ *cmds++ = 2;
+ *cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+ *cmds++ = 0x00000000;
+ return 5;
+}
+
+static inline int adreno_add_change_mh_phys_limit_cmds(unsigned int *cmds,
+ unsigned int new_phys_limit,
+ unsigned int nop_gpuaddr)
+{
+ unsigned int *start = cmds;
+
+ cmds += __adreno_add_idle_indirect_cmds(cmds, nop_gpuaddr);
+ *cmds++ = cp_type0_packet(MH_MMU_MPU_END, 1);
+ *cmds++ = new_phys_limit;
+ cmds += __adreno_add_idle_indirect_cmds(cmds, nop_gpuaddr);
+ return cmds - start;
+}
+
+static inline int adreno_add_bank_change_cmds(unsigned int *cmds,
+ int cur_ctx_bank,
+ unsigned int nop_gpuaddr)
+{
+ unsigned int *start = cmds;
+
+ cmds += __adreno_add_idle_indirect_cmds(cmds, nop_gpuaddr);
+ *cmds++ = cp_type0_packet(REG_CP_STATE_DEBUG_INDEX, 1);
+ *cmds++ = (cur_ctx_bank ? 0 : 0x20);
+ cmds += __adreno_add_idle_indirect_cmds(cmds, nop_gpuaddr);
+ return cmds - start;
+}
+
+/*
+ * adreno_read_cmds - Add pm4 packets to perform read
+ * @device - Pointer to device structure
+ * @cmds - Pointer to memory where read commands need to be added
+ * @addr - gpu address of the read
+ * @val - The GPU will wait until the data at address addr becomes
+ * equal to value
+ */
+static inline int adreno_add_read_cmds(struct kgsl_device *device,
+ unsigned int *cmds, unsigned int addr,
+ unsigned int val, unsigned int nop_gpuaddr)
+{
+ unsigned int *start = cmds;
+
+ *cmds++ = cp_type3_packet(CP_WAIT_REG_MEM, 5);
+ /* MEM SPACE = memory, FUNCTION = equals */
+ *cmds++ = 0x13;
+ *cmds++ = addr;
+ *cmds++ = val;
+ *cmds++ = 0xFFFFFFFF;
+ *cmds++ = 0xFFFFFFFF;
+ cmds += __adreno_add_idle_indirect_cmds(cmds, nop_gpuaddr);
+ return cmds - start;
+}
+
#endif /*__ADRENO_H */
diff --git a/drivers/gpu/msm/adreno_a2xx.c b/drivers/gpu/msm/adreno_a2xx.c
index eb936f8..ae846da 100644
--- a/drivers/gpu/msm/adreno_a2xx.c
+++ b/drivers/gpu/msm/adreno_a2xx.c
@@ -1848,8 +1848,13 @@
/* NQ and External Memory Swap */
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
- /* Protected mode error checking */
- GSL_RB_WRITE(cmds, cmds_gpu, GSL_RB_PROTECTED_MODE_CONTROL);
+ /* Protected mode error checking
+ * If iommu is used then protection needs to be turned off
+ * to enable context bank switching */
+ if (KGSL_MMU_TYPE_IOMMU == kgsl_mmu_get_mmutype())
+ GSL_RB_WRITE(cmds, cmds_gpu, 0);
+ else
+ GSL_RB_WRITE(cmds, cmds_gpu, GSL_RB_PROTECTED_MODE_CONTROL);
/* Disable header dumping and Header dump address */
GSL_RB_WRITE(cmds, cmds_gpu, 0x00000000);
/* Header dump size */
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
index fdd4aa5..fb65565 100644
--- a/drivers/gpu/msm/kgsl_iommu.c
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -595,7 +595,9 @@
kgsl_regwrite(mmu->device, MH_MMU_CONFIG, 0x00000001);
kgsl_regwrite(mmu->device, MH_MMU_MPU_END,
mh->mpu_base +
- iommu->iommu_units[0].reg_map.gpuaddr - PAGE_SIZE);
+ iommu->iommu_units
+ [iommu->unit_count - 1].reg_map.gpuaddr -
+ PAGE_SIZE);
} else {
kgsl_regwrite(mmu->device, MH_MMU_CONFIG, 0x00000000);
}
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
index 663ba0f..b882807 100644
--- a/drivers/gpu/msm/kgsl_mmu.c
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -545,8 +545,7 @@
struct kgsl_device *device = mmu->device;
if (KGSL_MMU_TYPE_NONE == kgsl_mmu_type)
return;
- else if (device->ftbl->setstate && (KGSL_MMU_TYPE_IOMMU !=
- kgsl_mmu_type))
+ else if (device->ftbl->setstate)
device->ftbl->setstate(device, flags);
else if (mmu->mmu_ops->mmu_device_setstate)
mmu->mmu_ops->mmu_device_setstate(mmu, flags);
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
index 3a29d71..2aaefba 100644
--- a/drivers/gpu/msm/kgsl_pwrctrl.c
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -706,6 +706,7 @@
}
kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_NAP);
+ kgsl_mmu_disable_clk(&device->mmu);
kgsl_pwrctrl_set_state(device, KGSL_STATE_NAP);
if (device->idle_wakelock.name)
wake_unlock(&device->idle_wakelock);
@@ -749,6 +750,7 @@
gpu_freq);
_sleep_accounting(device);
kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_SLEEP);
+ kgsl_mmu_disable_clk(&device->mmu);
kgsl_pwrctrl_set_state(device, KGSL_STATE_SLEEP);
wake_unlock(&device->idle_wakelock);
pm_qos_update_request(&device->pm_qos_req_dma,
@@ -888,6 +890,7 @@
/* Order pwrrail/clk sequence based upon platform */
kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_OFF);
kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_SLEEP);
+ kgsl_mmu_disable_clk(&device->mmu);
kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_OFF);
}
EXPORT_SYMBOL(kgsl_pwrctrl_disable);