Merge changes Ic0dedbad,Ic0dedbad,Ic0dedbad,Ic0dedbad,Ic0dedbad,Ic0dedbad,Ic0dedbad,If110d49f into msm-3.0

* changes:
  msm: kgsl: Put only critical IBs in the snapshot section
  msm: kgsl: Add indirect shader buffers to the snapshot
  msm: kgsl: Freeze GPU memory objects to be dumped with the snapshot
  msm: kgsl: Find a mem_entry by way of a GPU address and a pagetable base
  msm: kgsl: Detach memory objects from a process ahead of destroy time
  msm: kgsl: Remove some uneeded debug output
  msm: kgsl: Do not BUG in GPU recovery if a valid context can't be found
  msm: kgsl: Enable ME split timeout reporting
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
index eb017de..986a160 100644
--- a/drivers/gpu/msm/adreno.c
+++ b/drivers/gpu/msm/adreno.c
@@ -973,7 +973,6 @@
 {
 	struct kgsl_memdesc *result = NULL;
 	struct kgsl_mem_entry *entry;
-	struct kgsl_process_private *priv;
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct adreno_ringbuffer *ringbuffer = &adreno_dev->ringbuffer;
 	struct kgsl_context *context;
@@ -988,21 +987,10 @@
 	if (kgsl_gpuaddr_in_memdesc(&device->memstore, gpuaddr, size))
 		return &device->memstore;
 
-	mutex_lock(&kgsl_driver.process_mutex);
-	list_for_each_entry(priv, &kgsl_driver.process_list, list) {
-		if (!kgsl_mmu_pt_equal(priv->pagetable, pt_base))
-			continue;
-		spin_lock(&priv->mem_lock);
-		entry = kgsl_sharedmem_find_region(priv, gpuaddr, size);
-		if (entry) {
-			result = &entry->memdesc;
-			spin_unlock(&priv->mem_lock);
-			mutex_unlock(&kgsl_driver.process_mutex);
-			return result;
-		}
-		spin_unlock(&priv->mem_lock);
-	}
-	mutex_unlock(&kgsl_driver.process_mutex);
+	entry = kgsl_get_mem_entry(pt_base, gpuaddr, size);
+
+	if (entry)
+		return &entry->memdesc;
 
 	while (1) {
 		struct adreno_context *adreno_context = NULL;
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
index 24e8efe..aeb48d7 100644
--- a/drivers/gpu/msm/adreno_a3xx.c
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -2583,7 +2583,7 @@
 	adreno_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
 
 	/* Enable AHB error reporting */
-	adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0x86FFFFFF);
+	adreno_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
 
 	/* Turn on the power counters */
 	adreno_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00003000);
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
index 75512d0..420a941 100644
--- a/drivers/gpu/msm/adreno_pm4types.h
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -197,6 +197,9 @@
 #define cp_nop_packet(cnt) \
 	 (CP_TYPE3_PKT | (((cnt)-1) << 16) | (CP_NOP << 8))
 
+#define pkt_is_type3(pkt) ((pkt) & CP_TYPE3_PKT)
+#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF)
+#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
 
 /* packet headers */
 #define CP_HDR_ME_INIT	cp_type3_packet(CP_ME_INIT, 18)
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
index a5c20dc..d6648e2 100644
--- a/drivers/gpu/msm/adreno_ringbuffer.c
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -641,7 +641,6 @@
 	unsigned int val3;
 	unsigned int copy_rb_contents = 0;
 	unsigned int cur_context;
-	unsigned int j;
 
 	GSL_RB_GET_READPTR(rb, &rb->rptr);
 
@@ -748,8 +747,20 @@
 			kgsl_sharedmem_readl(&rb->buffer_desc, &value, rb_rptr);
 			rb_rptr = adreno_ringbuffer_inc_wrapped(rb_rptr,
 							rb->buffer_desc.size);
-			BUG_ON((copy_rb_contents == 0) &&
-				(value == cur_context));
+
+			/*
+			 * If other context switches were already lost and
+			 * and the current context is the one that is hanging,
+			 * then we cannot recover.  Print an error message
+			 * and leave.
+			 */
+
+			if ((copy_rb_contents == 0) && (value == cur_context)) {
+				KGSL_DRV_ERR(device, "GPU recovery could not "
+					"find the previous context\n");
+				return -EINVAL;
+			}
+
 			/*
 			 * If we were copying the commands and got to this point
 			 * then we need to remove the 3 commands that appear
@@ -780,19 +791,6 @@
 	}
 
 	*rb_size = temp_idx;
-	KGSL_DRV_ERR(device, "Extracted rb contents, size: %x\n", *rb_size);
-	for (temp_idx = 0; temp_idx < *rb_size;) {
-		char str[80];
-		int idx = 0;
-		if ((temp_idx + 8) <= *rb_size)
-			j = 8;
-		else
-			j = *rb_size - temp_idx;
-		for (; j != 0; j--)
-			idx += scnprintf(str + idx, 80 - idx,
-				"%8.8X ", temp_rb_buffer[temp_idx++]);
-		printk(KERN_ALERT "%s", str);
-	}
 	return 0;
 }
 
diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c
index 9836043..082df4b 100644
--- a/drivers/gpu/msm/adreno_snapshot.c
+++ b/drivers/gpu/msm/adreno_snapshot.c
@@ -27,6 +27,9 @@
 
 #define SNAPSHOT_OBJ_TYPE_IB 0
 
+/* Keep track of how many bytes are frozen after a snapshot and tell the user */
+static int snapshot_frozen_objsize;
+
 static struct kgsl_snapshot_obj {
 	int type;
 	uint32_t gpuaddr;
@@ -104,6 +107,97 @@
 	return 0;
 }
 
+static void ib_parse_load_state(struct kgsl_device *device, unsigned int *pkt,
+	unsigned int ptbase)
+{
+	unsigned int block, source, type;
+
+	/*
+	 * The object here is to find indirect shaders i.e - shaders loaded from
+	 * GPU memory instead of directly in the command.  These should be added
+	 * to the list of memory objects to dump. So look at the load state
+	 * call and see if 1) the shader block is a shader (block = 4, 5 or 6)
+	 * 2) that the block is indirect (source = 4). If these all match then
+	 * add the memory address to the list.  The size of the object will
+	 * differ depending on the type.  Type 0 (instructions) are 8 dwords per
+	 * unit and type 1 (constants) are 2 dwords per unit.
+	 */
+
+	if (type3_pkt_size(pkt[0]) < 2)
+		return;
+
+	/*
+	 * pkt[1] 18:16 - source
+	 * pkt[1] 21:19 - state block
+	 * pkt[1] 31:22 - size in units
+	 * pkt[2] 0:1 - type
+	 * pkt[2] 31:2 - GPU memory address
+	 */
+
+	block = (pkt[1] >> 19) & 0x07;
+	source = (pkt[1] >> 16) & 0x07;
+	type = pkt[2] & 0x03;
+
+	if ((block == 4 || block == 5 || block == 6) && source == 4) {
+		int unitsize = (type == 0) ? 8 : 2;
+		int ret;
+
+		/* Freeze the GPU buffer containing the shader */
+
+		ret = kgsl_snapshot_get_object(device, ptbase,
+				pkt[2] & 0xFFFFFFFC,
+				(((pkt[1] >> 22) & 0x03FF) * unitsize) << 2,
+				SNAPSHOT_GPU_OBJECT_SHADER);
+		snapshot_frozen_objsize += ret;
+	}
+}
+
+/*
+ * Parse all the type3 opcode packets that may contain important information,
+ * such as additional GPU buffers to grab
+ */
+
+static void ib_parse_type3(struct kgsl_device *device, unsigned int *ptr,
+	unsigned int ptbase)
+{
+	switch (cp_type3_opcode(*ptr)) {
+	case CP_LOAD_STATE:
+		ib_parse_load_state(device, ptr, ptbase);
+		break;
+	}
+}
+
+/* Add an IB as a GPU object, but first, parse it to find more goodies within */
+
+static void ib_add_gpu_object(struct kgsl_device *device, unsigned int ptbase,
+		unsigned int gpuaddr, unsigned int dwords)
+{
+	int i, ret;
+	unsigned int *src = (unsigned int *) adreno_convertaddr(device, ptbase,
+		gpuaddr, dwords << 2);
+
+	if (src == NULL)
+		return;
+
+	for (i = 0; i < dwords; i++) {
+		if (pkt_is_type3(src[i])) {
+			if ((dwords - i) < type3_pkt_size(src[i]) + 1)
+				continue;
+
+			if (adreno_cmd_is_ib(src[i]))
+				ib_add_gpu_object(device, ptbase,
+					src[i + 1], src[i + 2]);
+			else
+				ib_parse_type3(device, &src[i], ptbase);
+		}
+	}
+
+	ret = kgsl_snapshot_get_object(device, ptbase, gpuaddr, dwords << 2,
+		SNAPSHOT_GPU_OBJECT_IB);
+
+	snapshot_frozen_objsize += ret;
+}
+
 /* Snapshot the istore memory */
 static int snapshot_istore(struct kgsl_device *device, void *snapshot,
 	int remain, void *priv)
@@ -137,7 +231,7 @@
 	unsigned int *data = snapshot + sizeof(*header);
 	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
 	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffer;
-	unsigned int rbbase, ptbase, rptr, *rbptr;
+	unsigned int rbbase, ptbase, rptr, *rbptr, ibbase;
 	int start, stop, index;
 	int numitems, size;
 	int parse_ibs = 0, ib_parse_start;
@@ -151,6 +245,13 @@
 	/* Get the current read pointers for the RB */
 	kgsl_regread(device, REG_CP_RB_RPTR, &rptr);
 
+	/*
+	 * Get the address of the last executed IB1 so we can be sure to
+	 * snapshot it
+	 */
+
+	kgsl_regread(device, REG_CP_IB1_BASE, &ibbase);
+
 	/* start the dump at the rptr minus some history */
 	start = (int) rptr - NUM_DWORDS_OF_RINGBUFFER_HISTORY;
 	if (start < 0)
@@ -249,9 +350,19 @@
 		if (index == rptr)
 			parse_ibs = 0;
 
-		if (parse_ibs && adreno_cmd_is_ib(rbptr[index]))
-			push_object(device, SNAPSHOT_OBJ_TYPE_IB, ptbase,
-				rbptr[index + 1], rbptr[index + 2]);
+		if (parse_ibs && adreno_cmd_is_ib(rbptr[index])) {
+			/*
+			 * The IB from CP_IB1_BASE goes into the snapshot, all
+			 * others get marked at GPU objects
+			 */
+			if (rbptr[index + 1] == ibbase)
+				push_object(device, SNAPSHOT_OBJ_TYPE_IB,
+					ptbase, rbptr[index + 1],
+					rbptr[index + 2]);
+			else
+				ib_add_gpu_object(device, ptbase,
+					rbptr[index + 1], rbptr[index + 2]);
+		}
 
 		index = index + 1;
 
@@ -277,7 +388,6 @@
 	return size + sizeof(*header);
 }
 
-/* Snapshot the memory for an indirect buffer */
 static int snapshot_ib(struct kgsl_device *device, void *snapshot,
 	int remain, void *priv)
 {
@@ -299,16 +409,19 @@
 	header->size = obj->dwords;
 
 	/* Write the contents of the ib */
-	for (i = 0; i < obj->dwords; i++) {
+	for (i = 0; i < obj->dwords; i++, src++, dst++) {
 		*dst = *src;
-		/* If another IB is discovered, then push it on the list too */
 
-		if (adreno_cmd_is_ib(*src))
-			push_object(device, SNAPSHOT_OBJ_TYPE_IB, obj->ptbase,
-				*(src + 1), *(src + 2));
+		if (pkt_is_type3(*src)) {
+			if ((obj->dwords - i) < type3_pkt_size(*src) + 1)
+				continue;
 
-		src++;
-		dst++;
+			if (adreno_cmd_is_ib(*src))
+				push_object(device, SNAPSHOT_OBJ_TYPE_IB,
+					obj->ptbase, src[1], src[2]);
+			else
+				ib_parse_type3(device, src, obj->ptbase);
+		}
 	}
 
 	return (obj->dwords << 2) + sizeof(*header);
@@ -354,6 +467,8 @@
 	/* Reset the list of objects */
 	objbufptr = 0;
 
+	snapshot_frozen_objsize = 0;
+
 	/* Get the physical address of the MMU pagetable */
 	ptbase = kgsl_mmu_get_current_ptbase(device);
 
@@ -425,5 +540,9 @@
 		snapshot = adreno_dev->gpudev->snapshot(adreno_dev, snapshot,
 			remain, hang);
 
+	if (snapshot_frozen_objsize)
+		KGSL_DRV_ERR(device, "GPU snapshot froze %dKb of GPU buffers\n",
+			snapshot_frozen_objsize / 1024);
+
 	return snapshot;
 }
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
index 50a6fab..5464bbb 100644
--- a/drivers/gpu/msm/kgsl.c
+++ b/drivers/gpu/msm/kgsl.c
@@ -136,6 +136,39 @@
 	}
 }
 
+/* kgsl_get_mem_entry - get the mem_entry structure for the specified object
+ * @ptbase - the pagetable base of the object
+ * @gpuaddr - the GPU address of the object
+ * @size - Size of the region to search
+ */
+
+struct kgsl_mem_entry *kgsl_get_mem_entry(unsigned int ptbase,
+	unsigned int gpuaddr, unsigned int size)
+{
+	struct kgsl_process_private *priv;
+	struct kgsl_mem_entry *entry;
+
+	mutex_lock(&kgsl_driver.process_mutex);
+
+	list_for_each_entry(priv, &kgsl_driver.process_list, list) {
+		if (!kgsl_mmu_pt_equal(priv->pagetable, ptbase))
+			continue;
+		spin_lock(&priv->mem_lock);
+		entry = kgsl_sharedmem_find_region(priv, gpuaddr, size);
+
+		if (entry) {
+			spin_unlock(&priv->mem_lock);
+			mutex_unlock(&kgsl_driver.process_mutex);
+			return entry;
+		}
+		spin_unlock(&priv->mem_lock);
+	}
+	mutex_unlock(&kgsl_driver.process_mutex);
+
+	return NULL;
+}
+EXPORT_SYMBOL(kgsl_get_mem_entry);
+
 static inline struct kgsl_mem_entry *
 kgsl_mem_entry_create(void)
 {
@@ -156,8 +189,6 @@
 						    struct kgsl_mem_entry,
 						    refcount);
 
-	entry->priv->stats[entry->memtype].cur -= entry->memdesc.size;
-
 	if (entry->memtype != KGSL_MEM_ENTRY_KERNEL)
 		kgsl_driver.stats.mapped -= entry->memdesc.size;
 
@@ -200,6 +231,21 @@
 	entry->priv = process;
 }
 
+/* Detach a memory entry from a process and unmap it from the MMU */
+
+static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry)
+{
+	if (entry == NULL)
+		return;
+
+	entry->priv->stats[entry->memtype].cur -= entry->memdesc.size;
+	entry->priv = NULL;
+
+	kgsl_mmu_unmap(entry->memdesc.pagetable, &entry->memdesc);
+
+	kgsl_mem_entry_put(entry);
+}
+
 /* Allocate a new context id */
 
 static struct kgsl_context *
@@ -593,7 +639,7 @@
 
 	list_for_each_entry_safe(entry, entry_tmp, &private->mem_list, list) {
 		list_del(&entry->list);
-		kgsl_mem_entry_put(entry);
+		kgsl_mem_entry_detach_process(entry);
 	}
 
 	kgsl_mmu_putpagetable(private->pagetable);
@@ -1016,7 +1062,7 @@
 	list_del(&entry->list);
 	spin_unlock(&entry->priv->mem_lock);
 	trace_kgsl_mem_timestamp_free(entry, timestamp);
-	kgsl_mem_entry_put(entry);
+	kgsl_mem_entry_detach_process(entry);
 }
 
 static long kgsl_ioctl_cmdstream_freememontimestamp(struct kgsl_device_private
@@ -1117,7 +1163,7 @@
 
 	if (entry) {
 		trace_kgsl_mem_free(entry);
-		kgsl_mem_entry_put(entry);
+		kgsl_mem_entry_detach_process(entry);
 	} else {
 		KGSL_CORE_ERR("invalid gpuaddr %08x\n", param->gpuaddr);
 		result = -EINVAL;
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
index b2fe095..06f78fc 100644
--- a/drivers/gpu/msm/kgsl.h
+++ b/drivers/gpu/msm/kgsl.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2008-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2008-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -126,10 +126,15 @@
 #define KGSL_MEM_ENTRY_ION    4
 #define KGSL_MEM_ENTRY_MAX    5
 
+/* List of flags */
+
+#define KGSL_MEM_ENTRY_FROZEN (1 << 0)
+
 struct kgsl_mem_entry {
 	struct kref refcount;
 	struct kgsl_memdesc memdesc;
 	int memtype;
+	int flags;
 	void *priv_data;
 	struct list_head list;
 	uint32_t free_timestamp;
@@ -145,6 +150,10 @@
 #endif
 
 void kgsl_mem_entry_destroy(struct kref *kref);
+
+struct kgsl_mem_entry *kgsl_get_mem_entry(unsigned int ptbase,
+		unsigned int gpuaddr, unsigned int size);
+
 struct kgsl_mem_entry *kgsl_sharedmem_find_region(
 	struct kgsl_process_private *private, unsigned int gpuaddr,
 	size_t size);
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
index d7a25a1..2eacf22 100644
--- a/drivers/gpu/msm/kgsl_device.h
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -176,6 +176,12 @@
 				   losing the output on multiple hangs  */
 	struct kobject snapshot_kobj;
 
+	/*
+	 * List of GPU buffers that have been frozen in memory until they can be
+	 * dumped
+	 */
+	struct list_head snapshot_obj_list;
+
 	/* Logging levels */
 	int cmd_log;
 	int ctxt_log;
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
index 671479e..8eebb77 100644
--- a/drivers/gpu/msm/kgsl_mmu.c
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2002,2007-2011, Code Aurora Forum. All rights reserved.
+/* Copyright (c) 2002,2007-2012, Code Aurora Forum. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -605,6 +605,7 @@
 			memdesc->gpuaddr & KGSL_MMU_ALIGN_MASK,
 			memdesc->size);
 
+	memdesc->gpuaddr = 0;
 	return 0;
 }
 EXPORT_SYMBOL(kgsl_mmu_unmap);
diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c
index 93fdc08..cfcb2ea 100644
--- a/drivers/gpu/msm/kgsl_snapshot.c
+++ b/drivers/gpu/msm/kgsl_snapshot.c
@@ -22,6 +22,17 @@
 #include "kgsl_sharedmem.h"
 #include "kgsl_snapshot.h"
 
+/* Placeholder for the list of memory objects frozen after a hang */
+
+struct kgsl_snapshot_object {
+	unsigned int gpuaddr;
+	unsigned int ptbase;
+	unsigned int size;
+	int type;
+	struct kgsl_mem_entry *entry;
+	struct list_head node;
+};
+
 /* idr_for_each function to count the number of contexts */
 
 static int snapshot_context_count(int id, void *ptr, void *data)
@@ -164,6 +175,199 @@
 	return (iregs->count * 4) + sizeof(*header);
 }
 
+#define GPU_OBJ_HEADER_SZ \
+	(sizeof(struct kgsl_snapshot_section_header) + \
+	 sizeof(struct kgsl_snapshot_gpu_object))
+
+#define GPU_OBJ_SECTION_SIZE(_o) \
+	(GPU_OBJ_HEADER_SZ + ((_o)->size))
+
+static int kgsl_snapshot_dump_object(struct kgsl_device *device,
+		struct kgsl_snapshot_object *obj, void *buf,
+		unsigned int off, unsigned int count)
+{
+	unsigned char headers[GPU_OBJ_HEADER_SZ];
+	struct kgsl_snapshot_section_header *sect =
+		(struct kgsl_snapshot_section_header *) headers;
+	struct kgsl_snapshot_gpu_object *header =
+		(struct kgsl_snapshot_gpu_object *) (headers + sizeof(*sect));
+	int ret = 0;
+
+	/* Construct a local copy of the headers */
+
+	sect->magic = SNAPSHOT_SECTION_MAGIC;
+	sect->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT;
+	sect->size = GPU_OBJ_SECTION_SIZE(obj);
+
+	header->type = obj->type;
+
+	/* Header size is in dwords, object size is in bytes */
+	header->size = obj->size >> 2;
+	header->gpuaddr = obj->gpuaddr;
+	header->ptbase = obj->ptbase;
+
+	/* Copy out any part of the header block that is needed */
+
+	if (off < GPU_OBJ_HEADER_SZ) {
+		int size = count < GPU_OBJ_HEADER_SZ - off ?
+			count : GPU_OBJ_HEADER_SZ - off;
+
+		memcpy(buf, headers + off, size);
+
+		count -= size;
+		ret += size;
+	}
+
+	/* Now copy whatever part of the data is needed */
+
+	if (off < (GPU_OBJ_HEADER_SZ + obj->size)) {
+		int offset;
+		int size = count < obj->size ? count : obj->size;
+
+		/*
+		 * If the desired gpuaddr isn't at the beginning of the region,
+		 * then offset the source pointer
+		 */
+
+		offset = obj->gpuaddr - obj->entry->memdesc.gpuaddr;
+
+		/*
+		 * Then  adjust it to account for the offset for the output
+		 * buffer.
+		 */
+
+		if (off > GPU_OBJ_HEADER_SZ) {
+			int loff = (off - GPU_OBJ_HEADER_SZ);
+
+			/* Adjust the size so we don't walk off the end */
+
+			if ((loff + size) > obj->size)
+				size = obj->size - loff;
+
+			offset += loff;
+		}
+
+		memcpy(buf + ret, obj->entry->memdesc.hostptr + offset, size);
+		ret += size;
+	}
+
+	return ret;
+}
+
+static void kgsl_snapshot_put_object(struct kgsl_device *device,
+	struct kgsl_snapshot_object *obj)
+{
+	list_del(&obj->node);
+
+	obj->entry->flags &= ~KGSL_MEM_ENTRY_FROZEN;
+	kgsl_mem_entry_put(obj->entry);
+
+	kfree(obj);
+}
+
+/* kgsl_snapshot_get_object - Mark a GPU buffer to be frozen
+ * @device - the device that is being snapshotted
+ * @ptbase - the pagetable base of the object to freeze
+ * @gpuaddr - The gpu address of the object to freeze
+ * @size - the size of the object (may not always be the size of the region)
+ * @type - the type of object being saved (shader, vbo, etc)
+ *
+ * Mark and freeze a GPU buffer object.  This will prevent it from being
+ * freed until it can be copied out as part of the snapshot dump.  Returns the
+ * size of the object being frozen
+ */
+
+int kgsl_snapshot_get_object(struct kgsl_device *device, unsigned int ptbase,
+	unsigned int gpuaddr, unsigned int size, unsigned int type)
+{
+	struct kgsl_mem_entry *entry;
+	struct kgsl_snapshot_object *obj;
+	int offset;
+
+	entry = kgsl_get_mem_entry(ptbase, gpuaddr, size);
+
+	if (entry == NULL) {
+		KGSL_DRV_ERR(device, "Unable to find GPU buffer %8.8X\n",
+				gpuaddr);
+		return 0;
+	}
+
+	/* We can't freeze external memory, because we don't own it */
+	if (entry->memtype != KGSL_MEM_ENTRY_KERNEL) {
+		KGSL_DRV_ERR(device,
+			"Only internal GPU buffers can be frozen\n");
+		return 0;
+	}
+
+	/*
+	 * size indicates the number of bytes in the region to save. This might
+	 * not always be the entire size of the region because some buffers are
+	 * sub-allocated from a larger region.  However, if size 0 was passed
+	 * thats a flag that the caller wants to capture the entire buffer
+	 */
+
+	if (size == 0) {
+		size = entry->memdesc.size;
+		offset = 0;
+
+		/* Adjust the gpuaddr to the start of the object */
+		gpuaddr = entry->memdesc.gpuaddr;
+	} else {
+		offset = gpuaddr - entry->memdesc.gpuaddr;
+	}
+
+	if (size + offset > entry->memdesc.size) {
+		KGSL_DRV_ERR(device, "Invalid size for GPU buffer %8.8X\n",
+				gpuaddr);
+		return 0;
+	}
+
+	/* If the buffer is already on the list, skip it */
+	list_for_each_entry(obj, &device->snapshot_obj_list, node) {
+		if (obj->gpuaddr == gpuaddr && obj->ptbase == ptbase) {
+			/* If the size is different, use the new size */
+			if (obj->size != size)
+				obj->size = size;
+
+			return 0;
+		}
+	}
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+
+	if (obj == NULL) {
+		KGSL_DRV_ERR(device, "Unable to allocate memory\n");
+		return 0;
+	}
+
+	/* Ref count the mem entry */
+	kgsl_mem_entry_get(entry);
+
+	obj->type = type;
+	obj->entry = entry;
+	obj->gpuaddr = gpuaddr;
+	obj->ptbase = ptbase;
+	obj->size = size;
+
+	list_add(&obj->node, &device->snapshot_obj_list);
+
+	/*
+	 * Return the size of the entire mem entry that was frozen - this gets
+	 * used for tracking how much memory is frozen for a hang.  Also, mark
+	 * the memory entry as frozen. If the entry was already marked as
+	 * frozen, then another buffer already got to it.  In that case, return
+	 * 0 so it doesn't get counted twice
+	 */
+
+	if (entry->flags & KGSL_MEM_ENTRY_FROZEN)
+		return 0;
+
+	entry->flags |= KGSL_MEM_ENTRY_FROZEN;
+
+	return entry->memdesc.size;
+}
+EXPORT_SYMBOL(kgsl_snapshot_get_object);
+
 /*
  * kgsl_snapshot_dump_regs - helper function to dump device registers
  * @device - the device to dump registers from
@@ -288,10 +492,6 @@
 		snapshot = device->ftbl->snapshot(device, snapshot, &remain,
 			hang);
 
-	/* Add the empty end section to let the parser know we are done */
-	snapshot = kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_END,
-		snapshot, &remain, NULL, NULL);
-
 	device->snapshot_timestamp = get_seconds();
 	device->snapshot_size = (int) (snapshot - device->snapshot);
 
@@ -326,6 +526,8 @@
 	size_t count)
 {
 	struct kgsl_device *device = kobj_to_device(kobj);
+	struct kgsl_snapshot_object *obj, *tmp;
+	unsigned int size, src, dst = 0;
 
 	if (device == NULL)
 		return 0;
@@ -337,25 +539,80 @@
 	/* Get the mutex to keep things from changing while we are dumping */
 	mutex_lock(&device->mutex);
 
-	/*
-	 * Release the freeze on the snapshot the first time the buffer is read
-	 */
+	if (off < device->snapshot_size) {
+		size = count < (device->snapshot_size - off) ?
+			count : device->snapshot_size - off;
+
+		memcpy(buf, device->snapshot + off, size);
+
+		count -= size;
+		dst += size;
+	}
+
+	if (count == 0)
+		goto done;
+
+	src = device->snapshot_size;
+
+	list_for_each_entry(obj, &device->snapshot_obj_list, node) {
+
+		int objsize = GPU_OBJ_SECTION_SIZE(obj);
+		int offset;
+
+		/* If the offset is beyond this object, then move on */
+
+		if (off >= (src + objsize)) {
+			src += objsize;
+			continue;
+		}
+
+		/* Adjust the offset to be relative to the object */
+		offset = (off >= src) ? (off - src) : 0;
+
+		size = kgsl_snapshot_dump_object(device, obj, buf + dst,
+			offset, count);
+
+		count -= size;
+		dst += size;
+
+		if (count == 0)
+			goto done;
+
+		/* Move on to the next object - update src accordingly */
+		src += objsize;
+	}
+
+	/* Add the end section */
+
+	if (off < (src + sizeof(struct kgsl_snapshot_section_header))) {
+		if (count >= sizeof(struct kgsl_snapshot_section_header)) {
+			struct kgsl_snapshot_section_header *head =
+				(void *) (buf + dst);
+
+			head->magic = SNAPSHOT_SECTION_MAGIC;
+			head->id = KGSL_SNAPSHOT_SECTION_END;
+			head->size = sizeof(*head);
+
+			dst += sizeof(*head);
+		} else {
+			goto done;
+		}
+	}
+
+	/* Release the buffers and unfreeze the snapshot */
+
+	list_for_each_entry_safe(obj, tmp, &device->snapshot_obj_list, node)
+		kgsl_snapshot_put_object(device, obj);
+
+	if (device->snapshot_frozen)
+		KGSL_DRV_ERR(device, "Snapshot objects released\n");
 
 	device->snapshot_frozen = 0;
 
-	if (off >= device->snapshot_size) {
-		count = 0;
-		goto exit;
-	}
-
-	if (off + count > device->snapshot_size)
-		count = device->snapshot_size - off;
-
-	memcpy(buf, device->snapshot + off, count);
-
-exit:
+done:
 	mutex_unlock(&device->mutex);
-	return count;
+
+	return dst;
 }
 
 /* Show the timestamp of the last collected snapshot */
@@ -459,6 +716,8 @@
 	device->snapshot_maxsize = KGSL_SNAPSHOT_MEMSIZE;
 	device->snapshot_timestamp = 0;
 
+	INIT_LIST_HEAD(&device->snapshot_obj_list);
+
 	ret = kobject_init_and_add(&device->snapshot_kobj, &ktype_snapshot,
 		&device->dev->kobj, "snapshot");
 	if (ret)
diff --git a/drivers/gpu/msm/kgsl_snapshot.h b/drivers/gpu/msm/kgsl_snapshot.h
index 3b72b0f..bd5be74 100644
--- a/drivers/gpu/msm/kgsl_snapshot.h
+++ b/drivers/gpu/msm/kgsl_snapshot.h
@@ -48,6 +48,8 @@
 #define KGSL_SNAPSHOT_SECTION_ISTORE       0x0801
 #define KGSL_SNAPSHOT_SECTION_DEBUG        0x0901
 #define KGSL_SNAPSHOT_SECTION_DEBUGBUS     0x0A01
+#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT   0x0B01
+
 #define KGSL_SNAPSHOT_SECTION_END          0xFFFF
 
 /* OS sub-section header */
@@ -149,6 +151,16 @@
 	int count; /* Number of dwords in the dump */
 } __packed;
 
+#define SNAPSHOT_GPU_OBJECT_SHADER 1
+#define SNAPSHOT_GPU_OBJECT_IB     2
+
+struct kgsl_snapshot_gpu_object {
+	int type;      /* Type of GPU object */
+	__u32 gpuaddr; /* GPU address of the the object */
+	__u32 ptbase;  /* Base for the pagetable the GPU address is valid in */
+	int size;    /* Size of the object (in dwords) */
+};
+
 #ifdef __KERNEL__
 
 /* Allocate 512K for each device snapshot */
@@ -272,6 +284,9 @@
 	void *snapshot, int *remain, unsigned int index,
 	unsigned int data, unsigned int start, unsigned int count);
 
+/* Freeze a GPU buffer so it can be dumped in the snapshot */
+int kgsl_snapshot_get_object(struct kgsl_device *device, unsigned int ptbase,
+	unsigned int gpuaddr, unsigned int size, unsigned int type);
 
 #endif
 #endif