msm: kgsl: Support user-specified caching hints
The user allocating memory can provide hints for the caching
settings when said buffer is mapped on the user side. Try
to obey the cache settings when applicable. The user will
be responsible for handling cache management both to and
from the GPU so add a new ioctl IOCTL_KGSL_GPUMEM_CACHE_SYNC to
support both directions (the old IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE)
only handled flushing, not invalidating. The legacy ioctl still
works it just does what it says it does - a cache flush.
Change-Id: Ic0dedbad55ce82f2b01ebc56de30d4649e2e7311
Signed-off-by: Jordan Crouse <jcrouse@codeaurora.org>
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index eca6a2a..0b445d6 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -1906,6 +1906,8 @@
/*
* Mask off unknown flags from userspace. This way the caller can
* check if a flag is supported by looking at the returned flags.
+ * Note: CACHEMODE is ignored for this call. Caching should be
+ * determined by type of allocation being mapped.
*/
param->flags &= KGSL_MEMFLAGS_GPUREADONLY
| KGSL_MEMTYPE_MASK
@@ -2023,34 +2025,93 @@
return result;
}
-/*This function flushes a graphics memory allocation from CPU cache
- *when caching is enabled with MMU*/
+static int _kgsl_gpumem_sync_cache(struct kgsl_mem_entry *entry, int op)
+{
+ int ret = 0;
+ int cacheop;
+ int mode;
+
+ /*
+ * Flush is defined as (clean | invalidate). If both bits are set, then
+ * do a flush, otherwise check for the individual bits and clean or inv
+ * as requested
+ */
+
+ if ((op & KGSL_GPUMEM_CACHE_FLUSH) == KGSL_GPUMEM_CACHE_FLUSH)
+ cacheop = KGSL_CACHE_OP_FLUSH;
+ else if (op & KGSL_GPUMEM_CACHE_CLEAN)
+ cacheop = KGSL_CACHE_OP_CLEAN;
+ else if (op & KGSL_GPUMEM_CACHE_INV)
+ cacheop = KGSL_CACHE_OP_INV;
+ else {
+ ret = -EINVAL;
+ goto done;
+ }
+
+ mode = kgsl_memdesc_get_cachemode(&entry->memdesc);
+ if (mode != KGSL_CACHEMODE_UNCACHED
+ && mode != KGSL_CACHEMODE_WRITECOMBINE)
+ kgsl_cache_range_op(&entry->memdesc, cacheop);
+
+done:
+ return ret;
+}
+
+/* New cache sync function - supports both directions (clean and invalidate) */
+
+static long
+kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv,
+ unsigned int cmd, void *data)
+{
+ struct kgsl_gpumem_sync_cache *param = data;
+ struct kgsl_process_private *private = dev_priv->process_priv;
+ struct kgsl_mem_entry *entry = NULL;
+
+ if (param->id != 0) {
+ entry = kgsl_sharedmem_find_id(private, param->id);
+ if (entry == NULL) {
+ KGSL_MEM_INFO(dev_priv->device, "can't find id %d\n",
+ param->id);
+ return -EINVAL;
+ }
+ } else if (param->gpuaddr != 0) {
+ spin_lock(&private->mem_lock);
+ entry = kgsl_sharedmem_find(private, param->gpuaddr);
+ spin_unlock(&private->mem_lock);
+ if (entry == NULL) {
+ KGSL_MEM_INFO(dev_priv->device,
+ "can't find gpuaddr %x\n",
+ param->gpuaddr);
+ return -EINVAL;
+ }
+ } else {
+ return -EINVAL;
+ }
+
+ return _kgsl_gpumem_sync_cache(entry, param->op);
+}
+
+/* Legacy cache function, does a flush (clean + inv) */
+
static long
kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv,
unsigned int cmd, void *data)
{
- int result = 0;
- struct kgsl_mem_entry *entry;
struct kgsl_sharedmem_free *param = data;
struct kgsl_process_private *private = dev_priv->process_priv;
+ struct kgsl_mem_entry *entry = NULL;
spin_lock(&private->mem_lock);
entry = kgsl_sharedmem_find(private, param->gpuaddr);
- if (!entry) {
- KGSL_CORE_ERR("invalid gpuaddr %08x\n", param->gpuaddr);
- result = -EINVAL;
- goto done;
- }
- if (!entry->memdesc.hostptr) {
- KGSL_CORE_ERR("invalid hostptr with gpuaddr %08x\n",
- param->gpuaddr);
- goto done;
+ spin_unlock(&private->mem_lock);
+ if (entry == NULL) {
+ KGSL_MEM_INFO(dev_priv->device,
+ "can't find gpuaddr %x\n",
+ param->gpuaddr);
+ return -EINVAL;
}
- kgsl_cache_range_op(&entry->memdesc, KGSL_CACHE_OP_CLEAN);
-done:
- spin_unlock(&private->mem_lock);
- return result;
+ return _kgsl_gpumem_sync_cache(entry, KGSL_GPUMEM_CACHE_FLUSH);
}
/*
@@ -2070,6 +2131,7 @@
* check if a flag is supported by looking at the returned flags.
*/
flags &= KGSL_MEMFLAGS_GPUREADONLY
+ | KGSL_CACHEMODE_MASK
| KGSL_MEMTYPE_MASK
| KGSL_MEMALIGN_MASK;
@@ -2409,6 +2471,8 @@
kgsl_ioctl_gpumem_free_id, 0),
KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO,
kgsl_ioctl_gpumem_get_info, 0),
+ KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE,
+ kgsl_ioctl_gpumem_sync_cache, 0),
};
static long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
@@ -2578,7 +2642,7 @@
static int kgsl_mmap(struct file *file, struct vm_area_struct *vma)
{
- unsigned int ret;
+ unsigned int ret, cache;
unsigned long vma_offset = vma->vm_pgoff << PAGE_SHIFT;
struct kgsl_device_private *dev_priv = file->private_data;
struct kgsl_process_private *private = dev_priv->process_priv;
@@ -2624,7 +2688,27 @@
vma->vm_flags |= entry->memdesc.ops->vmflags(&entry->memdesc);
vma->vm_private_data = entry;
- vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+ /* Determine user-side caching policy */
+
+ cache = kgsl_memdesc_get_cachemode(&entry->memdesc);
+
+ switch (cache) {
+ case KGSL_CACHEMODE_UNCACHED:
+ vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+ break;
+ case KGSL_CACHEMODE_WRITETHROUGH:
+ vma->vm_page_prot = pgprot_writethroughcache(vma->vm_page_prot);
+ break;
+ case KGSL_CACHEMODE_WRITEBACK:
+ vma->vm_page_prot = pgprot_writebackcache(vma->vm_page_prot);
+ break;
+ case KGSL_CACHEMODE_WRITECOMBINE:
+ default:
+ vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+ break;
+ }
+
vma->vm_ops = &kgsl_gpumem_vm_ops;
vma->vm_file = file;
diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c
index 6144bcf..d4834e5 100644
--- a/drivers/gpu/msm/kgsl_debugfs.c
+++ b/drivers/gpu/msm/kgsl_debugfs.c
@@ -221,12 +221,23 @@
return '-';
}
+static char get_cacheflag(const struct kgsl_memdesc *m)
+{
+ static const char table[] = {
+ [KGSL_CACHEMODE_WRITECOMBINE] = '-',
+ [KGSL_CACHEMODE_UNCACHED] = 'u',
+ [KGSL_CACHEMODE_WRITEBACK] = 'b',
+ [KGSL_CACHEMODE_WRITETHROUGH] = 't',
+ };
+ return table[kgsl_memdesc_get_cachemode(m)];
+}
+
static int process_mem_print(struct seq_file *s, void *unused)
{
struct kgsl_mem_entry *entry;
struct rb_node *node;
struct kgsl_process_private *private = s->private;
- char flags[4];
+ char flags[5];
char usage[16];
spin_lock(&private->mem_lock);
@@ -241,7 +252,8 @@
flags[0] = kgsl_memdesc_is_global(m) ? 'g' : '-';
flags[1] = m->flags & KGSL_MEMFLAGS_GPUREADONLY ? 'r' : '-';
flags[2] = get_alignflag(m);
- flags[3] = '\0';
+ flags[3] = get_cacheflag(m);
+ flags[4] = '\0';
kgsl_get_memory_usage(usage, sizeof(usage), m->flags);
diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c
index a2db0bd..11ca0b0 100644
--- a/drivers/gpu/msm/kgsl_sharedmem.c
+++ b/drivers/gpu/msm/kgsl_sharedmem.c
@@ -511,7 +511,14 @@
void kgsl_cache_range_op(struct kgsl_memdesc *memdesc, int op)
{
- void *addr = memdesc->hostptr;
+ /*
+ * If the buffer is mapped in the kernel operate on that address
+ * otherwise use the user address
+ */
+
+ void *addr = (memdesc->hostptr) ?
+ memdesc->hostptr : (void *) memdesc->useraddr;
+
int size = memdesc->size;
if (addr != NULL) {
diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h
index 3868266..f07f049 100644
--- a/drivers/gpu/msm/kgsl_sharedmem.h
+++ b/drivers/gpu/msm/kgsl_sharedmem.h
@@ -83,6 +83,18 @@
}
/*
+ * kgsl_memdesc_get_cachemode - Get cache mode of a memdesc
+ * @memdesc: the memdesc
+ *
+ * Returns a KGSL_CACHEMODE* value.
+ */
+static inline int
+kgsl_memdesc_get_cachemode(const struct kgsl_memdesc *memdesc)
+{
+ return (memdesc->flags & KGSL_CACHEMODE_MASK) >> KGSL_CACHEMODE_SHIFT;
+}
+
+/*
* kgsl_memdesc_set_align - Set alignment flags of a memdesc
* @memdesc - the memdesc
* @align - alignment requested, as a power of 2 exponent.
diff --git a/include/linux/msm_kgsl.h b/include/linux/msm_kgsl.h
index b3323ed..ee136cac 100644
--- a/include/linux/msm_kgsl.h
+++ b/include/linux/msm_kgsl.h
@@ -1,6 +1,13 @@
#ifndef _MSM_KGSL_H
#define _MSM_KGSL_H
+/*
+ * The KGSL version has proven not to be very useful in userspace if features
+ * are cherry picked into other trees out of order so it is frozen as of 3.14.
+ * It is left here for backwards compatabilty and as a reminder that
+ * software releases are never linear. Also, I like pie.
+ */
+
#define KGSL_VERSION_MAJOR 3
#define KGSL_VERSION_MINOR 14
@@ -16,13 +23,24 @@
#define KGSL_CONTEXT_INVALID 0xffffffff
-/* Memory allocayion flags */
-#define KGSL_MEMFLAGS_GPUREADONLY 0x01000000
+/* --- Memory allocation flags --- */
+/* General allocation hints */
+#define KGSL_MEMFLAGS_GPUREADONLY 0x01000000
+
+/* Memory caching hints */
+#define KGSL_CACHEMODE_MASK 0x0C000000
+#define KGSL_CACHEMODE_SHIFT 26
+
+#define KGSL_CACHEMODE_WRITECOMBINE 0
+#define KGSL_CACHEMODE_UNCACHED 1
+#define KGSL_CACHEMODE_WRITETHROUGH 2
+#define KGSL_CACHEMODE_WRITEBACK 3
+
+/* Memory types for which allocations are made */
#define KGSL_MEMTYPE_MASK 0x0000FF00
#define KGSL_MEMTYPE_SHIFT 8
-/* Memory types for which allocations are made */
#define KGSL_MEMTYPE_OBJECTANY 0
#define KGSL_MEMTYPE_FRAMEBUFFER 1
#define KGSL_MEMTYPE_RENDERBUFFER 2
@@ -53,7 +71,8 @@
#define KGSL_MEMALIGN_MASK 0x00FF0000
#define KGSL_MEMALIGN_SHIFT 16
-/* generic flag values */
+/* --- generic KGSL flag values --- */
+
#define KGSL_FLAGS_NORMALMODE 0x00000000
#define KGSL_FLAGS_SAFEMODE 0x00000001
#define KGSL_FLAGS_INITIALIZED0 0x00000002
@@ -419,6 +438,14 @@
#define IOCTL_KGSL_SHAREDMEM_FROM_VMALLOC \
_IOWR(KGSL_IOC_TYPE, 0x23, struct kgsl_sharedmem_from_vmalloc)
+/*
+ * This is being deprecated in favor of IOCTL_KGSL_GPUMEM_CACHE_SYNC which
+ * supports both directions (flush and invalidate). This code will still
+ * work, but by definition it will do a flush of the cache which might not be
+ * what you want to have happen on a buffer following a GPU operation. It is
+ * safer to go with IOCTL_KGSL_GPUMEM_CACHE_SYNC
+ */
+
#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE \
_IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free)
@@ -587,6 +614,37 @@
#define IOCTL_KGSL_GPUMEM_GET_INFO\
_IOWR(KGSL_IOC_TYPE, 0x36, struct kgsl_gpumem_get_info)
+/**
+ * struct kgsl_gpumem_sync_cache - argument to IOCTL_KGSL_GPUMEM_SYNC_CACHE
+ * @gpuaddr: GPU address of the buffer to sync.
+ * @id: id of the buffer to sync. Either gpuaddr or id is sufficient.
+ * @op: a mask of KGSL_GPUMEM_CACHE_* values
+ *
+ * Sync the L2 cache for memory headed to and from the GPU - this replaces
+ * KGSL_SHAREDMEM_FLUSH_CACHE since it can handle cache management for both
+ * directions
+ *
+ */
+struct kgsl_gpumem_sync_cache {
+ unsigned int gpuaddr;
+ unsigned int id;
+ unsigned int op;
+/* private: reserved for future use*/
+ unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define KGSL_GPUMEM_CACHE_CLEAN (1 << 0)
+#define KGSL_GPUMEM_CACHE_TO_GPU KGSL_GPUMEM_CACHE_CLEAN
+
+#define KGSL_GPUMEM_CACHE_INV (1 << 1)
+#define KGSL_GPUMEM_CACHE_FROM_GPU KGSL_GPUMEM_CACHE_INV
+
+#define KGSL_GPUMEM_CACHE_FLUSH \
+ (KGSL_GPUMEM_CACHE_CLEAN | KGSL_GPUMEM_CACHE_INV)
+
+#define IOCTL_KGSL_GPUMEM_SYNC_CACHE \
+ _IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache)
+
#ifdef __KERNEL__
#ifdef CONFIG_MSM_KGSL_DRM
int kgsl_gem_obj_addr(int drm_fd, int handle, unsigned long *start,