msm: kgsl: avoid vmap failure while allocating big buffers
Limit the amount of pages vmapped at one time during allocation,
to avoid requesting more vmalloc space than is likely available.
CRs-Fixed: 445005
Change-Id: Icee5912687edf4da585d88308ee0e1c971964785
Signed-off-by: Jeremy Gebben <jgebben@codeaurora.org>
diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c
index ab2fd75..b97004a 100644
--- a/drivers/gpu/msm/kgsl_sharedmem.c
+++ b/drivers/gpu/msm/kgsl_sharedmem.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2012, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2002,2007-2013, The Linux Foundation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 and
@@ -549,6 +549,7 @@
pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
void *ptr;
unsigned int align;
+ int step = ((VMALLOC_END - VMALLOC_START)/8) >> PAGE_SHIFT;
align = (memdesc->flags & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT;
@@ -676,30 +677,36 @@
* zeroed and unmaped each individual page, and then we had to turn
* around and call flush_dcache_page() on that page to clear the caches.
* This was killing us for performance. Instead, we found it is much
- * faster to allocate the pages without GFP_ZERO, map the entire range,
- * memset it, flush the range and then unmap - this results in a factor
- * of 4 improvement for speed for large buffers. There is a small
- * increase in speed for small buffers, but only on the order of a few
- * microseconds at best. The only downside is that there needs to be
- * enough temporary space in vmalloc to accomodate the map. This
- * shouldn't be a problem, but if it happens, fall back to a much slower
- * path
+ * faster to allocate the pages without GFP_ZERO, map a chunk of the
+ * range ('step' pages), memset it, flush it and then unmap
+ * - this results in a factor of 4 improvement for speed for large
+ * buffers. There is a small decrease in speed for small buffers,
+ * but only on the order of a few microseconds at best. The 'step'
+ * size is based on a guess at the amount of free vmalloc space, but
+ * will scale down if there's not enough free space.
*/
+ for (j = 0; j < pcount; j += step) {
+ step = min(step, pcount - j);
- ptr = vmap(pages, pcount, VM_IOREMAP, page_prot);
+ ptr = vmap(&pages[j], step, VM_IOREMAP, page_prot);
- if (ptr != NULL) {
- memset(ptr, 0, memdesc->size);
- dmac_flush_range(ptr, ptr + memdesc->size);
- vunmap(ptr);
- } else {
- /* Very, very, very slow path */
+ if (ptr != NULL) {
+ memset(ptr, 0, step * PAGE_SIZE);
+ dmac_flush_range(ptr, ptr + step * PAGE_SIZE);
+ vunmap(ptr);
+ } else {
+ int k;
+ /* Very, very, very slow path */
- for (j = 0; j < pcount; j++) {
- ptr = kmap_atomic(pages[j]);
- memset(ptr, 0, PAGE_SIZE);
- dmac_flush_range(ptr, ptr + PAGE_SIZE);
- kunmap_atomic(ptr);
+ for (k = j; k < j + step; k++) {
+ ptr = kmap_atomic(pages[k]);
+ memset(ptr, 0, PAGE_SIZE);
+ dmac_flush_range(ptr, ptr + PAGE_SIZE);
+ kunmap_atomic(ptr);
+ }
+ /* scale down the step size to avoid this path */
+ if (step > 1)
+ step >>= 1;
}
}