msm: kgsl: Add Qualcomm GPU driver

Snapshot of the Qualcomm Adreno GPU driver (KGSL) as of msm-4.4
commit 97e6e94b40eb ("Promotion of kernel.lnx.4.4-161210.1.").

Change-Id: If5a8141cf7224d9661aba70576356a1ed2a99996
Signed-off-by: Shrenuj Bansal <shrenujb@codeaurora.org>
Signed-off-by: Lynus Vaz <lvaz@codeaurora.org>
diff --git a/drivers/gpu/Makefile b/drivers/gpu/Makefile
index e9ed439..f4ebf77 100644
--- a/drivers/gpu/Makefile
+++ b/drivers/gpu/Makefile
@@ -4,3 +4,4 @@
 obj-$(CONFIG_TEGRA_HOST1X)	+= host1x/
 obj-y			+= drm/ vga/
 obj-$(CONFIG_IMX_IPUV3_CORE)	+= ipu-v3/
+obj-$(CONFIG_QCOM_KGSL) += msm/
diff --git a/drivers/gpu/msm/Kconfig b/drivers/gpu/msm/Kconfig
new file mode 100644
index 0000000..35d8310
--- /dev/null
+++ b/drivers/gpu/msm/Kconfig
@@ -0,0 +1,27 @@
+config QCOM_KGSL
+	tristate "Qualcomm Technologies, Inc. 3D Graphics driver"
+	default n
+	depends on ARCH_QCOM
+	select GENERIC_ALLOCATOR
+	select FW_LOADER
+	select PM_DEVFREQ
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
+	select DEVFREQ_GOV_PERFORMANCE
+	select DEVFREQ_GOV_QCOM_ADRENO_TZ
+	select DEVFREQ_GOV_QCOM_GPUBW_MON
+	select ONESHOT_SYNC if SYNC
+	---help---
+	  3D graphics driver for the Adreno family of GPUs from QTI.
+	  Required to use hardware accelerated OpenGL, compute and Vulkan
+	  on QTI targets. This includes power management, memory management,
+	  and scheduling for the Adreno GPUs.
+
+config QCOM_ADRENO_DEFAULT_GOVERNOR
+	string "devfreq governor for the adreno core"
+	default "msm-adreno-tz" if DEVFREQ_GOV_QCOM_ADRENO_TZ
+	default "simple_ondemand"
+	depends on QCOM_KGSL
+
+config QCOM_KGSL_IOMMU
+	bool
+	default y if QCOM_KGSL && (MSM_IOMMU || ARM_SMMU)
diff --git a/drivers/gpu/msm/Makefile b/drivers/gpu/msm/Makefile
new file mode 100644
index 0000000..0634776
--- /dev/null
+++ b/drivers/gpu/msm/Makefile
@@ -0,0 +1,50 @@
+ccflags-y := -Idrivers/staging/android
+
+msm_kgsl_core-y = \
+	kgsl.o \
+	kgsl_trace.o \
+	kgsl_drawobj.o \
+	kgsl_ioctl.o \
+	kgsl_sharedmem.o \
+	kgsl_pwrctrl.o \
+	kgsl_pwrscale.o \
+	kgsl_mmu.o \
+	kgsl_snapshot.o \
+	kgsl_events.o \
+	kgsl_pool.o
+
+msm_kgsl_core-$(CONFIG_QCOM_KGSL_IOMMU) += kgsl_iommu.o
+msm_kgsl_core-$(CONFIG_DEBUG_FS) += kgsl_debugfs.o
+msm_kgsl_core-$(CONFIG_SYNC) += kgsl_sync.o
+msm_kgsl_core-$(CONFIG_COMPAT) += kgsl_compat.o
+
+msm_adreno-y += \
+	adreno_ioctl.o \
+	adreno_ringbuffer.o \
+	adreno_drawctxt.o \
+	adreno_dispatch.o \
+	adreno_snapshot.o \
+	adreno_coresight.o \
+	adreno_trace.o \
+	adreno_a3xx.o \
+	adreno_a4xx.o \
+	adreno_a5xx.o \
+	adreno_a3xx_snapshot.o \
+	adreno_a4xx_snapshot.o \
+	adreno_a5xx_snapshot.o \
+	adreno_a4xx_preempt.o \
+	adreno_a5xx_preempt.o \
+	adreno_sysfs.o \
+	adreno.o \
+	adreno_cp_parser.o \
+	adreno_perfcounter.o
+
+msm_adreno-$(CONFIG_QCOM_KGSL_IOMMU) += adreno_iommu.o
+msm_adreno-$(CONFIG_DEBUG_FS) += adreno_debugfs.o adreno_profile.o
+msm_adreno-$(CONFIG_COMPAT) += adreno_compat.o
+
+msm_kgsl_core-objs = $(msm_kgsl_core-y)
+msm_adreno-objs = $(msm_adreno-y)
+
+obj-$(CONFIG_QCOM_KGSL) += msm_kgsl_core.o
+obj-$(CONFIG_QCOM_KGSL) += msm_adreno.o
diff --git a/drivers/gpu/msm/a3xx_reg.h b/drivers/gpu/msm/a3xx_reg.h
new file mode 100644
index 0000000..fc17470
--- /dev/null
+++ b/drivers/gpu/msm/a3xx_reg.h
@@ -0,0 +1,575 @@
+/* Copyright (c) 2012-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _A300_REG_H
+#define _A300_REG_H
+
+/* Interrupt bit positions within RBBM_INT_0 */
+
+#define A3XX_INT_RBBM_GPU_IDLE 0
+#define A3XX_INT_RBBM_AHB_ERROR 1
+#define A3XX_INT_RBBM_REG_TIMEOUT 2
+#define A3XX_INT_RBBM_ME_MS_TIMEOUT 3
+#define A3XX_INT_RBBM_PFP_MS_TIMEOUT 4
+#define A3XX_INT_RBBM_ATB_BUS_OVERFLOW 5
+#define A3XX_INT_VFD_ERROR 6
+#define A3XX_INT_CP_SW_INT 7
+#define A3XX_INT_CP_T0_PACKET_IN_IB 8
+#define A3XX_INT_CP_OPCODE_ERROR 9
+#define A3XX_INT_CP_RESERVED_BIT_ERROR 10
+#define A3XX_INT_CP_HW_FAULT 11
+#define A3XX_INT_CP_DMA 12
+#define A3XX_INT_CP_IB2_INT 13
+#define A3XX_INT_CP_IB1_INT 14
+#define A3XX_INT_CP_RB_INT 15
+#define A3XX_INT_CP_REG_PROTECT_FAULT 16
+#define A3XX_INT_CP_RB_DONE_TS 17
+#define A3XX_INT_CP_VS_DONE_TS 18
+#define A3XX_INT_CP_PS_DONE_TS 19
+#define A3XX_INT_CACHE_FLUSH_TS 20
+#define A3XX_INT_CP_AHB_ERROR_HALT 21
+#define A3XX_INT_MISC_HANG_DETECT 24
+#define A3XX_INT_UCHE_OOB_ACCESS 25
+
+/* CP_EVENT_WRITE events */
+#define CACHE_FLUSH_TS 4
+
+/* Register definitions */
+
+#define A3XX_RBBM_CLOCK_CTL 0x010
+#define A3XX_RBBM_SP_HYST_CNT 0x012
+#define A3XX_RBBM_SW_RESET_CMD 0x018
+#define A3XX_RBBM_AHB_CTL0 0x020
+#define A3XX_RBBM_AHB_CTL1 0x021
+#define A3XX_RBBM_AHB_CMD 0x022
+#define A3XX_RBBM_AHB_ERROR_STATUS 0x027
+#define A3XX_RBBM_GPR0_CTL 0x02E
+/* This the same register as on A2XX, just in a different place */
+#define A3XX_RBBM_STATUS 0x030
+#define A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL 0x33
+#define A3XX_RBBM_INTERFACE_HANG_INT_CTL 0x50
+#define A3XX_RBBM_INT_CLEAR_CMD 0x061
+#define A3XX_RBBM_INT_0_MASK 0x063
+#define A3XX_RBBM_INT_0_STATUS 0x064
+#define A3XX_RBBM_PERFCTR_CTL 0x80
+#define A3XX_RBBM_PERFCTR_LOAD_CMD0 0x81
+#define A3XX_RBBM_PERFCTR_LOAD_CMD1 0x82
+#define A3XX_RBBM_PERFCTR_LOAD_VALUE_LO 0x84
+#define A3XX_RBBM_PERFCTR_LOAD_VALUE_HI 0x85
+#define A3XX_RBBM_PERFCOUNTER0_SELECT 0x86
+#define A3XX_RBBM_PERFCOUNTER1_SELECT 0x87
+#define A3XX_RBBM_GPU_BUSY_MASKED 0x88
+#define A3XX_RBBM_PERFCTR_CP_0_LO 0x90
+#define A3XX_RBBM_PERFCTR_CP_0_HI 0x91
+#define A3XX_RBBM_PERFCTR_RBBM_0_LO 0x92
+#define A3XX_RBBM_PERFCTR_RBBM_0_HI 0x93
+#define A3XX_RBBM_PERFCTR_RBBM_1_LO 0x94
+#define A3XX_RBBM_PERFCTR_RBBM_1_HI 0x95
+#define A3XX_RBBM_PERFCTR_PC_0_LO 0x96
+#define A3XX_RBBM_PERFCTR_PC_0_HI 0x97
+#define A3XX_RBBM_PERFCTR_PC_1_LO 0x98
+#define A3XX_RBBM_PERFCTR_PC_1_HI 0x99
+#define A3XX_RBBM_PERFCTR_PC_2_LO 0x9A
+#define A3XX_RBBM_PERFCTR_PC_2_HI 0x9B
+#define A3XX_RBBM_PERFCTR_PC_3_LO 0x9C
+#define A3XX_RBBM_PERFCTR_PC_3_HI 0x9D
+#define A3XX_RBBM_PERFCTR_VFD_0_LO 0x9E
+#define A3XX_RBBM_PERFCTR_VFD_0_HI 0x9F
+#define A3XX_RBBM_PERFCTR_VFD_1_LO 0xA0
+#define A3XX_RBBM_PERFCTR_VFD_1_HI 0xA1
+#define A3XX_RBBM_PERFCTR_HLSQ_0_LO 0xA2
+#define A3XX_RBBM_PERFCTR_HLSQ_0_HI 0xA3
+#define A3XX_RBBM_PERFCTR_HLSQ_1_LO 0xA4
+#define A3XX_RBBM_PERFCTR_HLSQ_1_HI 0xA5
+#define A3XX_RBBM_PERFCTR_HLSQ_2_LO 0xA6
+#define A3XX_RBBM_PERFCTR_HLSQ_2_HI 0xA7
+#define A3XX_RBBM_PERFCTR_HLSQ_3_LO 0xA8
+#define A3XX_RBBM_PERFCTR_HLSQ_3_HI 0xA9
+#define A3XX_RBBM_PERFCTR_HLSQ_4_LO 0xAA
+#define A3XX_RBBM_PERFCTR_HLSQ_4_HI 0xAB
+#define A3XX_RBBM_PERFCTR_HLSQ_5_LO 0xAC
+#define A3XX_RBBM_PERFCTR_HLSQ_5_HI 0xAD
+#define A3XX_RBBM_PERFCTR_VPC_0_LO 0xAE
+#define A3XX_RBBM_PERFCTR_VPC_0_HI 0xAF
+#define A3XX_RBBM_PERFCTR_VPC_1_LO 0xB0
+#define A3XX_RBBM_PERFCTR_VPC_1_HI 0xB1
+#define A3XX_RBBM_PERFCTR_TSE_0_LO 0xB2
+#define A3XX_RBBM_PERFCTR_TSE_0_HI 0xB3
+#define A3XX_RBBM_PERFCTR_TSE_1_LO 0xB4
+#define A3XX_RBBM_PERFCTR_TSE_1_HI 0xB5
+#define A3XX_RBBM_PERFCTR_RAS_0_LO 0xB6
+#define A3XX_RBBM_PERFCTR_RAS_0_HI 0xB7
+#define A3XX_RBBM_PERFCTR_RAS_1_LO 0xB8
+#define A3XX_RBBM_PERFCTR_RAS_1_HI 0xB9
+#define A3XX_RBBM_PERFCTR_UCHE_0_LO 0xBA
+#define A3XX_RBBM_PERFCTR_UCHE_0_HI 0xBB
+#define A3XX_RBBM_PERFCTR_UCHE_1_LO 0xBC
+#define A3XX_RBBM_PERFCTR_UCHE_1_HI 0xBD
+#define A3XX_RBBM_PERFCTR_UCHE_2_LO 0xBE
+#define A3XX_RBBM_PERFCTR_UCHE_2_HI 0xBF
+#define A3XX_RBBM_PERFCTR_UCHE_3_LO 0xC0
+#define A3XX_RBBM_PERFCTR_UCHE_3_HI 0xC1
+#define A3XX_RBBM_PERFCTR_UCHE_4_LO 0xC2
+#define A3XX_RBBM_PERFCTR_UCHE_4_HI 0xC3
+#define A3XX_RBBM_PERFCTR_UCHE_5_LO 0xC4
+#define A3XX_RBBM_PERFCTR_UCHE_5_HI 0xC5
+#define A3XX_RBBM_PERFCTR_TP_0_LO 0xC6
+#define A3XX_RBBM_PERFCTR_TP_0_HI 0xC7
+#define A3XX_RBBM_PERFCTR_TP_1_LO 0xC8
+#define A3XX_RBBM_PERFCTR_TP_1_HI 0xC9
+#define A3XX_RBBM_PERFCTR_TP_2_LO 0xCA
+#define A3XX_RBBM_PERFCTR_TP_2_HI 0xCB
+#define A3XX_RBBM_PERFCTR_TP_3_LO 0xCC
+#define A3XX_RBBM_PERFCTR_TP_3_HI 0xCD
+#define A3XX_RBBM_PERFCTR_TP_4_LO 0xCE
+#define A3XX_RBBM_PERFCTR_TP_4_HI 0xCF
+#define A3XX_RBBM_PERFCTR_TP_5_LO 0xD0
+#define A3XX_RBBM_PERFCTR_TP_5_HI 0xD1
+#define A3XX_RBBM_PERFCTR_SP_0_LO 0xD2
+#define A3XX_RBBM_PERFCTR_SP_0_HI 0xD3
+#define A3XX_RBBM_PERFCTR_SP_1_LO 0xD4
+#define A3XX_RBBM_PERFCTR_SP_1_HI 0xD5
+#define A3XX_RBBM_PERFCTR_SP_2_LO 0xD6
+#define A3XX_RBBM_PERFCTR_SP_2_HI 0xD7
+#define A3XX_RBBM_PERFCTR_SP_3_LO 0xD8
+#define A3XX_RBBM_PERFCTR_SP_3_HI 0xD9
+#define A3XX_RBBM_PERFCTR_SP_4_LO 0xDA
+#define A3XX_RBBM_PERFCTR_SP_4_HI 0xDB
+#define A3XX_RBBM_PERFCTR_SP_5_LO 0xDC
+#define A3XX_RBBM_PERFCTR_SP_5_HI 0xDD
+#define A3XX_RBBM_PERFCTR_SP_6_LO 0xDE
+#define A3XX_RBBM_PERFCTR_SP_6_HI 0xDF
+#define A3XX_RBBM_PERFCTR_SP_7_LO 0xE0
+#define A3XX_RBBM_PERFCTR_SP_7_HI 0xE1
+#define A3XX_RBBM_PERFCTR_RB_0_LO 0xE2
+#define A3XX_RBBM_PERFCTR_RB_0_HI 0xE3
+#define A3XX_RBBM_PERFCTR_RB_1_LO 0xE4
+#define A3XX_RBBM_PERFCTR_RB_1_HI 0xE5
+
+#define A3XX_RBBM_RBBM_CTL 0x100
+#define A3XX_RBBM_PERFCTR_PWR_0_LO 0x0EA
+#define A3XX_RBBM_PERFCTR_PWR_0_HI 0x0EB
+#define A3XX_RBBM_PERFCTR_PWR_1_LO 0x0EC
+#define A3XX_RBBM_PERFCTR_PWR_1_HI 0x0ED
+#define A3XX_RBBM_DEBUG_BUS_CTL 0x111
+#define A3XX_RBBM_DEBUG_BUS_DATA_STATUS 0x112
+#define A3XX_RBBM_DEBUG_BUS_STB_CTL0 0x11B
+#define A3XX_RBBM_DEBUG_BUS_STB_CTL1 0x11C
+#define A3XX_RBBM_INT_TRACE_BUS_CTL 0x11D
+#define A3XX_RBBM_EXT_TRACE_BUS_CTL 0x11E
+#define A3XX_RBBM_EXT_TRACE_STOP_CNT 0x11F
+#define A3XX_RBBM_EXT_TRACE_START_CNT 0x120
+#define A3XX_RBBM_EXT_TRACE_PERIOD_CNT 0x121
+#define A3XX_RBBM_EXT_TRACE_CMD 0x122
+#define A3XX_CP_RB_BASE 0x01C0
+#define A3XX_CP_RB_CNTL 0x01C1
+#define A3XX_CP_RB_RPTR 0x01C4
+#define A3XX_CP_RB_WPTR 0x01C5
+/* Following two are same as on A2XX, just in a different place */
+#define A3XX_CP_PFP_UCODE_ADDR 0x1C9
+#define A3XX_CP_PFP_UCODE_DATA 0x1CA
+#define A3XX_CP_ROQ_ADDR 0x1CC
+#define A3XX_CP_ROQ_DATA 0x1CD
+#define A3XX_CP_MERCIU_ADDR 0x1D1
+#define A3XX_CP_MERCIU_DATA 0x1D2
+#define A3XX_CP_MERCIU_DATA2 0x1D3
+#define A3XX_CP_QUEUE_THRESHOLDS 0x01D5
+#define A3XX_CP_MEQ_ADDR 0x1DA
+#define A3XX_CP_MEQ_DATA 0x1DB
+#define A3XX_CP_STATE_DEBUG_INDEX 0x01EC
+#define A3XX_CP_STATE_DEBUG_DATA 0x01ED
+#define A3XX_CP_CNTL 0x01F4
+#define A3XX_CP_WFI_PEND_CTR 0x01F5
+#define A3XX_CP_ME_CNTL 0x01F6
+#define A3XX_CP_ME_STATUS 0x01F7
+#define A3XX_CP_ME_RAM_WADDR 0x01F8
+#define A3XX_CP_ME_RAM_RADDR 0x01F9
+#define A3XX_CP_ME_RAM_DATA 0x01FA
+#define A3XX_CP_DEBUG 0x01FC
+
+#define A3XX_RBBM_PM_OVERRIDE2 0x039D
+
+#define A3XX_CP_PERFCOUNTER_SELECT 0x445
+#define A3XX_CP_IB1_BASE 0x0458
+#define A3XX_CP_IB1_BUFSZ 0x0459
+#define A3XX_CP_IB2_BASE 0x045A
+#define A3XX_CP_IB2_BUFSZ 0x045B
+
+#define A3XX_CP_HW_FAULT  0x45C
+#define A3XX_CP_PROTECT_CTRL 0x45E
+#define A3XX_CP_PROTECT_STATUS 0x45F
+#define A3XX_CP_PROTECT_REG_0 0x460
+#define A3XX_CP_STAT 0x047F
+#define A3XX_CP_SCRATCH_REG0 0x578
+#define A3XX_CP_SCRATCH_REG6 0x57E
+#define A3XX_CP_SCRATCH_REG7 0x57F
+#define A3XX_VSC_SIZE_ADDRESS 0xC02
+#define A3XX_VSC_PIPE_DATA_ADDRESS_0 0xC07
+#define A3XX_VSC_PIPE_DATA_LENGTH_0 0xC08
+#define A3XX_VSC_PIPE_DATA_ADDRESS_1 0xC0A
+#define A3XX_VSC_PIPE_DATA_LENGTH_1 0xC0B
+#define A3XX_VSC_PIPE_DATA_ADDRESS_2 0xC0D
+#define A3XX_VSC_PIPE_DATA_LENGTH_2 0xC0E
+#define A3XX_VSC_PIPE_DATA_ADDRESS_3 0xC10
+#define A3XX_VSC_PIPE_DATA_LENGTH_3 0xC11
+#define A3XX_VSC_PIPE_DATA_ADDRESS_4 0xC13
+#define A3XX_VSC_PIPE_DATA_LENGTH_4 0xC14
+#define A3XX_VSC_PIPE_DATA_ADDRESS_5 0xC16
+#define A3XX_VSC_PIPE_DATA_LENGTH_5 0xC17
+#define A3XX_VSC_PIPE_DATA_ADDRESS_6 0xC19
+#define A3XX_VSC_PIPE_DATA_LENGTH_6 0xC1A
+#define A3XX_VSC_PIPE_DATA_ADDRESS_7 0xC1C
+#define A3XX_VSC_PIPE_DATA_LENGTH_7 0xC1D
+#define A3XX_PC_PERFCOUNTER0_SELECT 0xC48
+#define A3XX_PC_PERFCOUNTER1_SELECT 0xC49
+#define A3XX_PC_PERFCOUNTER2_SELECT 0xC4A
+#define A3XX_PC_PERFCOUNTER3_SELECT 0xC4B
+#define A3XX_GRAS_TSE_DEBUG_ECO 0xC81
+#define A3XX_GRAS_PERFCOUNTER0_SELECT 0xC88
+#define A3XX_GRAS_PERFCOUNTER1_SELECT 0xC89
+#define A3XX_GRAS_PERFCOUNTER2_SELECT 0xC8A
+#define A3XX_GRAS_PERFCOUNTER3_SELECT 0xC8B
+#define A3XX_GRAS_CL_USER_PLANE_X0 0xCA0
+#define A3XX_GRAS_CL_USER_PLANE_Y0 0xCA1
+#define A3XX_GRAS_CL_USER_PLANE_Z0 0xCA2
+#define A3XX_GRAS_CL_USER_PLANE_W0 0xCA3
+#define A3XX_GRAS_CL_USER_PLANE_X1 0xCA4
+#define A3XX_GRAS_CL_USER_PLANE_Y1 0xCA5
+#define A3XX_GRAS_CL_USER_PLANE_Z1 0xCA6
+#define A3XX_GRAS_CL_USER_PLANE_W1 0xCA7
+#define A3XX_GRAS_CL_USER_PLANE_X2 0xCA8
+#define A3XX_GRAS_CL_USER_PLANE_Y2 0xCA9
+#define A3XX_GRAS_CL_USER_PLANE_Z2 0xCAA
+#define A3XX_GRAS_CL_USER_PLANE_W2 0xCAB
+#define A3XX_GRAS_CL_USER_PLANE_X3 0xCAC
+#define A3XX_GRAS_CL_USER_PLANE_Y3 0xCAD
+#define A3XX_GRAS_CL_USER_PLANE_Z3 0xCAE
+#define A3XX_GRAS_CL_USER_PLANE_W3 0xCAF
+#define A3XX_GRAS_CL_USER_PLANE_X4 0xCB0
+#define A3XX_GRAS_CL_USER_PLANE_Y4 0xCB1
+#define A3XX_GRAS_CL_USER_PLANE_Z4 0xCB2
+#define A3XX_GRAS_CL_USER_PLANE_W4 0xCB3
+#define A3XX_GRAS_CL_USER_PLANE_X5 0xCB4
+#define A3XX_GRAS_CL_USER_PLANE_Y5 0xCB5
+#define A3XX_GRAS_CL_USER_PLANE_Z5 0xCB6
+#define A3XX_GRAS_CL_USER_PLANE_W5 0xCB7
+#define A3XX_RB_GMEM_BASE_ADDR 0xCC0
+#define A3XX_RB_DEBUG_ECO_CONTROLS_ADDR 0xCC1
+#define A3XX_RB_PERFCOUNTER0_SELECT   0xCC6
+#define A3XX_RB_PERFCOUNTER1_SELECT   0xCC7
+#define A3XX_RB_FRAME_BUFFER_DIMENSION 0xCE0
+#define A3XX_SQ_GPR_MANAGEMENT 0x0D00
+#define A3XX_SQ_INST_STORE_MANAGEMENT 0x0D02
+#define A3XX_HLSQ_PERFCOUNTER0_SELECT 0xE00
+#define A3XX_HLSQ_PERFCOUNTER1_SELECT 0xE01
+#define A3XX_HLSQ_PERFCOUNTER2_SELECT 0xE02
+#define A3XX_HLSQ_PERFCOUNTER3_SELECT 0xE03
+#define A3XX_HLSQ_PERFCOUNTER4_SELECT 0xE04
+#define A3XX_HLSQ_PERFCOUNTER5_SELECT 0xE05
+#define A3XX_TP0_CHICKEN 0x0E1E
+#define A3XX_VFD_PERFCOUNTER0_SELECT 0xE44
+#define A3XX_VFD_PERFCOUNTER1_SELECT 0xE45
+#define A3XX_VPC_VPC_DEBUG_RAM_SEL 0xE61
+#define A3XX_VPC_VPC_DEBUG_RAM_READ 0xE62
+#define A3XX_VPC_PERFCOUNTER0_SELECT 0xE64
+#define A3XX_VPC_PERFCOUNTER1_SELECT 0xE65
+#define A3XX_UCHE_CACHE_MODE_CONTROL_REG 0xE82
+#define A3XX_UCHE_PERFCOUNTER0_SELECT 0xE84
+#define A3XX_UCHE_PERFCOUNTER1_SELECT 0xE85
+#define A3XX_UCHE_PERFCOUNTER2_SELECT 0xE86
+#define A3XX_UCHE_PERFCOUNTER3_SELECT 0xE87
+#define A3XX_UCHE_PERFCOUNTER4_SELECT 0xE88
+#define A3XX_UCHE_PERFCOUNTER5_SELECT 0xE89
+#define A3XX_UCHE_CACHE_INVALIDATE0_REG 0xEA0
+#define A3XX_UCHE_CACHE_INVALIDATE1_REG 0xEA1
+#define A3XX_UCHE_CACHE_WAYS_VFD 0xEA6
+#define A3XX_SP_PERFCOUNTER0_SELECT 0xEC4
+#define A3XX_SP_PERFCOUNTER1_SELECT 0xEC5
+#define A3XX_SP_PERFCOUNTER2_SELECT 0xEC6
+#define A3XX_SP_PERFCOUNTER3_SELECT 0xEC7
+#define A3XX_SP_PERFCOUNTER4_SELECT 0xEC8
+#define A3XX_SP_PERFCOUNTER5_SELECT 0xEC9
+#define A3XX_SP_PERFCOUNTER6_SELECT 0xECA
+#define A3XX_SP_PERFCOUNTER7_SELECT 0xECB
+#define A3XX_TP_PERFCOUNTER0_SELECT 0xF04
+#define A3XX_TP_PERFCOUNTER1_SELECT 0xF05
+#define A3XX_TP_PERFCOUNTER2_SELECT 0xF06
+#define A3XX_TP_PERFCOUNTER3_SELECT 0xF07
+#define A3XX_TP_PERFCOUNTER4_SELECT 0xF08
+#define A3XX_TP_PERFCOUNTER5_SELECT 0xF09
+#define A3XX_GRAS_CL_CLIP_CNTL 0x2040
+#define A3XX_GRAS_CL_GB_CLIP_ADJ 0x2044
+#define A3XX_GRAS_CL_VPORT_XOFFSET 0x2048
+#define A3XX_GRAS_CL_VPORT_XSCALE 0x2049
+#define A3XX_GRAS_CL_VPORT_YOFFSET 0x204A
+#define A3XX_GRAS_CL_VPORT_YSCALE 0x204B
+#define A3XX_GRAS_CL_VPORT_ZOFFSET 0x204C
+#define A3XX_GRAS_CL_VPORT_ZSCALE 0x204D
+#define A3XX_GRAS_SU_POINT_MINMAX 0x2068
+#define A3XX_GRAS_SU_POINT_SIZE 0x2069
+#define A3XX_GRAS_SU_POLY_OFFSET_SCALE 0x206C
+#define A3XX_GRAS_SU_POLY_OFFSET_OFFSET 0x206D
+#define A3XX_GRAS_SU_MODE_CONTROL 0x2070
+#define A3XX_GRAS_SC_CONTROL 0x2072
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_TL 0x2074
+#define A3XX_GRAS_SC_SCREEN_SCISSOR_BR 0x2075
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_TL 0x2079
+#define A3XX_GRAS_SC_WINDOW_SCISSOR_BR 0x207A
+#define A3XX_RB_MODE_CONTROL 0x20C0
+#define A3XX_RB_RENDER_CONTROL 0x20C1
+#define A3XX_RB_MSAA_CONTROL 0x20C2
+#define A3XX_RB_ALPHA_REFERENCE 0x20C3
+#define A3XX_RB_MRT_CONTROL0 0x20C4
+#define A3XX_RB_MRT_BUF_INFO0 0x20C5
+#define A3XX_RB_MRT_BUF_BASE0 0x20C6
+#define A3XX_RB_MRT_BLEND_CONTROL0 0x20C7
+#define A3XX_RB_MRT_CONTROL1 0x20C8
+#define A3XX_RB_MRT_BUF_INFO1 0x20C9
+#define A3XX_RB_MRT_BUF_BASE1 0x20CA
+#define A3XX_RB_MRT_BLEND_CONTROL1 0x20CB
+#define A3XX_RB_MRT_CONTROL2 0x20CC
+#define A3XX_RB_MRT_BUF_INFO2 0x20CD
+#define A3XX_RB_MRT_BUF_BASE2 0x20CE
+#define A3XX_RB_MRT_BLEND_CONTROL2 0x20CF
+#define A3XX_RB_MRT_CONTROL3 0x20D0
+#define A3XX_RB_MRT_BUF_INFO3 0x20D1
+#define A3XX_RB_MRT_BUF_BASE3 0x20D2
+#define A3XX_RB_MRT_BLEND_CONTROL3 0x20D3
+#define A3XX_RB_BLEND_RED 0x20E4
+#define A3XX_RB_BLEND_GREEN 0x20E5
+#define A3XX_RB_BLEND_BLUE 0x20E6
+#define A3XX_RB_BLEND_ALPHA 0x20E7
+#define A3XX_RB_CLEAR_COLOR_DW0 0x20E8
+#define A3XX_RB_CLEAR_COLOR_DW1 0x20E9
+#define A3XX_RB_CLEAR_COLOR_DW2 0x20EA
+#define A3XX_RB_CLEAR_COLOR_DW3 0x20EB
+#define A3XX_RB_COPY_CONTROL 0x20EC
+#define A3XX_RB_COPY_DEST_BASE 0x20ED
+#define A3XX_RB_COPY_DEST_PITCH 0x20EE
+#define A3XX_RB_COPY_DEST_INFO 0x20EF
+#define A3XX_RB_DEPTH_CONTROL 0x2100
+#define A3XX_RB_DEPTH_CLEAR 0x2101
+#define A3XX_RB_DEPTH_BUF_INFO 0x2102
+#define A3XX_RB_DEPTH_BUF_PITCH 0x2103
+#define A3XX_RB_STENCIL_CONTROL 0x2104
+#define A3XX_RB_STENCIL_CLEAR 0x2105
+#define A3XX_RB_STENCIL_BUF_INFO 0x2106
+#define A3XX_RB_STENCIL_BUF_PITCH 0x2107
+#define A3XX_RB_STENCIL_REF_MASK 0x2108
+#define A3XX_RB_STENCIL_REF_MASK_BF 0x2109
+#define A3XX_RB_LRZ_VSC_CONTROL 0x210C
+#define A3XX_RB_WINDOW_OFFSET 0x210E
+#define A3XX_RB_SAMPLE_COUNT_CONTROL 0x2110
+#define A3XX_RB_SAMPLE_COUNT_ADDR 0x2111
+#define A3XX_RB_Z_CLAMP_MIN 0x2114
+#define A3XX_RB_Z_CLAMP_MAX 0x2115
+#define A3XX_HLSQ_CONTROL_0_REG 0x2200
+#define A3XX_HLSQ_CONTROL_1_REG 0x2201
+#define A3XX_HLSQ_CONTROL_2_REG 0x2202
+#define A3XX_HLSQ_CONTROL_3_REG 0x2203
+#define A3XX_HLSQ_VS_CONTROL_REG 0x2204
+#define A3XX_HLSQ_FS_CONTROL_REG 0x2205
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x2206
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x2207
+#define A3XX_HLSQ_CL_NDRANGE_0_REG 0x220A
+#define A3XX_HLSQ_CL_NDRANGE_1_REG 0x220B
+#define A3XX_HLSQ_CL_NDRANGE_2_REG 0x220C
+#define A3XX_HLSQ_CL_NDRANGE_3_REG 0x220D
+#define A3XX_HLSQ_CL_NDRANGE_4_REG 0x220E
+#define A3XX_HLSQ_CL_NDRANGE_5_REG 0x220F
+#define A3XX_HLSQ_CL_NDRANGE_6_REG 0x2210
+#define A3XX_HLSQ_CL_CONTROL_0_REG 0x2211
+#define A3XX_HLSQ_CL_CONTROL_1_REG 0x2212
+#define A3XX_HLSQ_CL_KERNEL_CONST_REG 0x2214
+#define A3XX_HLSQ_CL_KERNEL_GROUP_X_REG 0x2215
+#define A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG 0x2216
+#define A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG 0x2217
+#define A3XX_HLSQ_CL_WG_OFFSET_REG 0x221A
+#define A3XX_VFD_FETCH_INSTR_1_0 0x2247
+#define A3XX_VFD_FETCH_INSTR_1_1 0x2249
+#define A3XX_VFD_FETCH_INSTR_1_2 0x224B
+#define A3XX_VFD_FETCH_INSTR_1_3 0x224D
+#define A3XX_VFD_FETCH_INSTR_1_4 0x224F
+#define A3XX_VFD_FETCH_INSTR_1_5 0x2251
+#define A3XX_VFD_FETCH_INSTR_1_6 0x2253
+#define A3XX_VFD_FETCH_INSTR_1_7 0x2255
+#define A3XX_VFD_FETCH_INSTR_1_8 0x2257
+#define A3XX_VFD_FETCH_INSTR_1_9 0x2259
+#define A3XX_VFD_FETCH_INSTR_1_A 0x225B
+#define A3XX_VFD_FETCH_INSTR_1_B 0x225D
+#define A3XX_VFD_FETCH_INSTR_1_C 0x225F
+#define A3XX_VFD_FETCH_INSTR_1_D 0x2261
+#define A3XX_VFD_FETCH_INSTR_1_E 0x2263
+#define A3XX_VFD_FETCH_INSTR_1_F 0x2265
+#define A3XX_SP_SP_CTRL_REG 0x22C0
+#define A3XX_SP_VS_CTRL_REG0 0x22C4
+#define A3XX_SP_VS_CTRL_REG1 0x22C5
+#define A3XX_SP_VS_PARAM_REG 0x22C6
+#define A3XX_SP_VS_OUT_REG_0 0x22C7
+#define A3XX_SP_VS_OUT_REG_1 0x22C8
+#define A3XX_SP_VS_OUT_REG_2 0x22C9
+#define A3XX_SP_VS_OUT_REG_3 0x22CA
+#define A3XX_SP_VS_OUT_REG_4 0x22CB
+#define A3XX_SP_VS_OUT_REG_5 0x22CC
+#define A3XX_SP_VS_OUT_REG_6 0x22CD
+#define A3XX_SP_VS_OUT_REG_7 0x22CE
+#define A3XX_SP_VS_VPC_DST_REG_0 0x22D0
+#define A3XX_SP_VS_VPC_DST_REG_1 0x22D1
+#define A3XX_SP_VS_VPC_DST_REG_2 0x22D2
+#define A3XX_SP_VS_VPC_DST_REG_3 0x22D3
+#define A3XX_SP_VS_OBJ_OFFSET_REG 0x22D4
+#define A3XX_SP_VS_OBJ_START_REG 0x22D5
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG 0x22D6
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG 0x22D7
+#define A3XX_SP_VS_PVT_MEM_SIZE_REG 0x22D8
+#define A3XX_SP_VS_LENGTH_REG 0x22DF
+#define A3XX_SP_FS_CTRL_REG0 0x22E0
+#define A3XX_SP_FS_CTRL_REG1 0x22E1
+#define A3XX_SP_FS_OBJ_OFFSET_REG 0x22E2
+#define A3XX_SP_FS_OBJ_START_REG 0x22E3
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG 0x22E4
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG 0x22E5
+#define A3XX_SP_FS_PVT_MEM_SIZE_REG 0x22E6
+#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_0 0x22E8
+#define A3XX_SP_FS_FLAT_SHAD_MODE_REG_1 0x22E9
+#define A3XX_SP_FS_OUTPUT_REG 0x22EC
+#define A3XX_SP_FS_MRT_REG_0 0x22F0
+#define A3XX_SP_FS_MRT_REG_1 0x22F1
+#define A3XX_SP_FS_MRT_REG_2 0x22F2
+#define A3XX_SP_FS_MRT_REG_3 0x22F3
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_0 0x22F4
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_1 0x22F5
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_2 0x22F6
+#define A3XX_SP_FS_IMAGE_OUTPUT_REG_3 0x22F7
+#define A3XX_SP_FS_LENGTH_REG 0x22FF
+#define A3XX_PA_SC_AA_CONFIG 0x2301
+#define A3XX_VBIF_CLKON 0x3001
+#define A3XX_VBIF_ABIT_SORT 0x301C
+#define A3XX_VBIF_ABIT_SORT_CONF 0x301D
+#define A3XX_VBIF_GATE_OFF_WRREQ_EN 0x302A
+#define A3XX_VBIF_IN_RD_LIM_CONF0 0x302C
+#define A3XX_VBIF_IN_RD_LIM_CONF1 0x302D
+#define A3XX_VBIF_IN_WR_LIM_CONF0 0x3030
+#define A3XX_VBIF_IN_WR_LIM_CONF1 0x3031
+#define A3XX_VBIF_OUT_RD_LIM_CONF0 0x3034
+#define A3XX_VBIF_OUT_WR_LIM_CONF0 0x3035
+#define A3XX_VBIF_DDR_OUT_MAX_BURST 0x3036
+#define A3XX_VBIF_ARB_CTL 0x303C
+#define A3XX_VBIF_ROUND_ROBIN_QOS_ARB 0x3049
+#define A3XX_VBIF_OUT_AXI_AOOO_EN 0x305E
+#define A3XX_VBIF_OUT_AXI_AOOO 0x305F
+#define A3XX_VBIF_PERF_CNT0_LO 0x3073
+#define A3XX_VBIF_PERF_CNT0_HI 0x3074
+#define A3XX_VBIF_PERF_CNT1_LO 0x3075
+#define A3XX_VBIF_PERF_CNT1_HI 0x3076
+#define A3XX_VBIF_PERF_PWR_CNT0_LO 0x3077
+#define A3XX_VBIF_PERF_PWR_CNT0_HI 0x3078
+#define A3XX_VBIF_PERF_PWR_CNT1_LO 0x3079
+#define A3XX_VBIF_PERF_PWR_CNT1_HI 0x307a
+#define A3XX_VBIF_PERF_PWR_CNT2_LO 0x307b
+#define A3XX_VBIF_PERF_PWR_CNT2_HI 0x307c
+
+#define A3XX_VBIF_XIN_HALT_CTRL0 0x3080
+#define A3XX_VBIF_XIN_HALT_CTRL0_MASK 0x3F
+#define A30X_VBIF_XIN_HALT_CTRL0_MASK 0x7
+
+#define A3XX_VBIF_XIN_HALT_CTRL1 0x3081
+
+/* VBIF register offsets for A306 */
+#define A3XX_VBIF2_PERF_CNT_SEL0 0x30d0
+#define A3XX_VBIF2_PERF_CNT_SEL1 0x30d1
+#define A3XX_VBIF2_PERF_CNT_SEL2 0x30d2
+#define A3XX_VBIF2_PERF_CNT_SEL3 0x30d3
+#define A3XX_VBIF2_PERF_CNT_LOW0 0x30d8
+#define A3XX_VBIF2_PERF_CNT_LOW1 0x30d9
+#define A3XX_VBIF2_PERF_CNT_LOW2 0x30da
+#define A3XX_VBIF2_PERF_CNT_LOW3 0x30db
+#define A3XX_VBIF2_PERF_CNT_HIGH0 0x30e0
+#define A3XX_VBIF2_PERF_CNT_HIGH1 0x30e1
+#define A3XX_VBIF2_PERF_CNT_HIGH2 0x30e2
+#define A3XX_VBIF2_PERF_CNT_HIGH3 0x30e3
+
+#define A3XX_VBIF2_PERF_PWR_CNT_EN0 0x3100
+#define A3XX_VBIF2_PERF_PWR_CNT_EN1 0x3101
+#define A3XX_VBIF2_PERF_PWR_CNT_EN2 0x3102
+#define A3XX_VBIF2_PERF_PWR_CNT_LOW0 0x3110
+#define A3XX_VBIF2_PERF_PWR_CNT_LOW1 0x3111
+#define A3XX_VBIF2_PERF_PWR_CNT_LOW2 0x3112
+#define A3XX_VBIF2_PERF_PWR_CNT_HIGH0 0x3118
+#define A3XX_VBIF2_PERF_PWR_CNT_HIGH1 0x3119
+#define A3XX_VBIF2_PERF_PWR_CNT_HIGH2 0x311a
+
+#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0 0x3800
+#define A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1 0x3801
+
+/* RBBM Debug bus block IDs */
+#define RBBM_BLOCK_ID_CP               0x1
+#define RBBM_BLOCK_ID_RBBM             0x2
+#define RBBM_BLOCK_ID_VBIF             0x3
+#define RBBM_BLOCK_ID_HLSQ             0x4
+#define RBBM_BLOCK_ID_UCHE             0x5
+#define RBBM_BLOCK_ID_PC               0x8
+#define RBBM_BLOCK_ID_VFD              0x9
+#define RBBM_BLOCK_ID_VPC              0xa
+#define RBBM_BLOCK_ID_TSE              0xb
+#define RBBM_BLOCK_ID_RAS              0xc
+#define RBBM_BLOCK_ID_VSC              0xd
+#define RBBM_BLOCK_ID_SP_0             0x10
+#define RBBM_BLOCK_ID_SP_1             0x11
+#define RBBM_BLOCK_ID_SP_2             0x12
+#define RBBM_BLOCK_ID_SP_3             0x13
+#define RBBM_BLOCK_ID_TPL1_0           0x18
+#define RBBM_BLOCK_ID_TPL1_1           0x19
+#define RBBM_BLOCK_ID_TPL1_2           0x1a
+#define RBBM_BLOCK_ID_TPL1_3           0x1b
+#define RBBM_BLOCK_ID_RB_0             0x20
+#define RBBM_BLOCK_ID_RB_1             0x21
+#define RBBM_BLOCK_ID_RB_2             0x22
+#define RBBM_BLOCK_ID_RB_3             0x23
+#define RBBM_BLOCK_ID_MARB_0           0x28
+#define RBBM_BLOCK_ID_MARB_1           0x29
+#define RBBM_BLOCK_ID_MARB_2           0x2a
+#define RBBM_BLOCK_ID_MARB_3           0x2b
+
+/* RBBM_CLOCK_CTL default value */
+#define A3XX_RBBM_CLOCK_CTL_DEFAULT   0xAAAAAAAA
+#define A320_RBBM_CLOCK_CTL_DEFAULT   0xBFFFFFFF
+#define A330_RBBM_CLOCK_CTL_DEFAULT   0xBFFCFFFF
+
+#define A330_RBBM_GPR0_CTL_DEFAULT    0x00000000
+#define A330v2_RBBM_GPR0_CTL_DEFAULT  0x05515455
+#define A310_RBBM_GPR0_CTL_DEFAULT    0x000000AA
+
+/* COUNTABLE FOR SP PERFCOUNTER */
+#define SP_ALU_ACTIVE_CYCLES           0x1D
+#define SP0_ICL1_MISSES                0x1A
+#define SP_FS_CFLOW_INSTRUCTIONS       0x0C
+
+/* COUNTABLE FOR TSE PERFCOUNTER */
+#define TSE_INPUT_PRIM_NUM             0x0
+
+/* VBIF countables */
+#define VBIF_AXI_TOTAL_BEATS 85
+
+/* VBIF Recoverable HALT bit value */
+#define VBIF_RECOVERABLE_HALT_CTRL 0x1
+
+/*
+ * CP DEBUG settings for A3XX core:
+ * DYNAMIC_CLK_DISABLE [27] - turn off the dynamic clock control
+ * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF
+ */
+#define A3XX_CP_DEBUG_DEFAULT ((1 << 27) | (1 << 25))
+
+
+#endif
diff --git a/drivers/gpu/msm/a4xx_reg.h b/drivers/gpu/msm/a4xx_reg.h
new file mode 100644
index 0000000..8e658c1
--- /dev/null
+++ b/drivers/gpu/msm/a4xx_reg.h
@@ -0,0 +1,847 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _A4XX_REG_H
+#define _A4XX_REG_H
+
+/* A4XX interrupt bits */
+#define A4XX_INT_RBBM_GPU_IDLE			0
+#define A4XX_INT_RBBM_AHB_ERROR			1
+#define A4XX_INT_RBBM_REG_TIMEOUT		2
+#define A4XX_INT_RBBM_ME_MS_TIMEOUT		3
+#define A4XX_INT_RBBM_PFP_MS_TIMEOUT		4
+#define A4XX_INT_RBBM_ETS_MS_TIMEOUT		5
+#define A4XX_INT_RBBM_ASYNC_OVERFLOW		6
+#define A4XX_INT_RBBM_GPC_ERR			7
+#define A4XX_INT_CP_SW				8
+#define A4XX_INT_CP_OPCODE_ERROR		9
+#define A4XX_INT_CP_RESERVED_BIT_ERROR		10
+#define A4XX_INT_CP_HW_FAULT			11
+#define A4XX_INT_CP_DMA				12
+#define A4XX_INT_CP_IB2_INT			13
+#define A4XX_INT_CP_IB1_INT			14
+#define A4XX_INT_CP_RB_INT			15
+#define A4XX_INT_CP_REG_PROTECT_FAULT		16
+#define A4XX_INT_CP_RB_DONE_TS			17
+#define A4XX_INT_CP_VS_DONE_TS			18
+#define A4XX_INT_CP_PS_DONE_TS			19
+#define A4XX_INT_CACHE_FLUSH_TS			20
+#define A4XX_INT_CP_AHB_ERROR_HALT		21
+#define A4XX_INT_RBBM_ATB_BUS_OVERFLOW		22
+#define A4XX_INT_MISC_HANG_DETECT		24
+#define A4XX_INT_UCHE_OOB_ACCESS		25
+#define A4XX_INT_RBBM_DPM_CALC_ERR		28
+#define A4XX_INT_RBBM_DPM_EPOCH_ERR		29
+#define A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR	30
+#define A4XX_INT_RBBM_DPM_THERMAL_RED_ERR	31
+
+/* RB registers */
+#define A4XX_RB_GMEM_BASE_ADDR		0xcc0
+
+#define A4XX_RB_PERFCTR_RB_SEL_0	0xcc7
+#define A4XX_RB_PERFCTR_RB_SEL_1	0xcc8
+#define A4XX_RB_PERFCTR_RB_SEL_2	0xcc9
+#define A4XX_RB_PERFCTR_RB_SEL_3	0xcca
+#define A4XX_RB_PERFCTR_RB_SEL_4	0xccb
+#define A4XX_RB_PERFCTR_RB_SEL_5	0xccc
+#define A4XX_RB_PERFCTR_RB_SEL_6	0xccd
+#define A4XX_RB_PERFCTR_RB_SEL_7	0xcce
+
+enum a4xx_rb_perfctr_rb_sel {
+	RB_VALID_SAMPLES = 0x25,
+	RB_Z_FAIL = 0x28,
+	RB_S_FAIL = 0x29,
+};
+
+/* RBBM registers */
+#define A4XX_RBBM_CLOCK_CTL_TP0			0x4
+#define A4XX_RBBM_CLOCK_CTL_TP1			0x5
+#define A4XX_RBBM_CLOCK_CTL_TP2			0x6
+#define A4XX_RBBM_CLOCK_CTL_TP3			0x7
+#define A4XX_RBBM_CLOCK_CTL2_TP0		0x8
+#define A4XX_RBBM_CLOCK_CTL2_TP1		0x9
+#define A4XX_RBBM_CLOCK_CTL2_TP2		0xA
+#define A4XX_RBBM_CLOCK_CTL2_TP3		0xB
+#define A4XX_RBBM_CLOCK_HYST_TP0		0xC
+#define A4XX_RBBM_CLOCK_HYST_TP1		0xD
+#define A4XX_RBBM_CLOCK_HYST_TP2		0xE
+#define A4XX_RBBM_CLOCK_HYST_TP3		0xF
+#define A4XX_RBBM_CLOCK_DELAY_TP0		0x10
+#define A4XX_RBBM_CLOCK_DELAY_TP1		0x11
+#define A4XX_RBBM_CLOCK_DELAY_TP2		0x12
+#define A4XX_RBBM_CLOCK_DELAY_TP3		0x13
+#define A4XX_RBBM_CLOCK_CTL_UCHE		0x14
+#define A4XX_RBBM_CLOCK_CTL2_UCHE		0x15
+#define A4XX_RBBM_CLOCK_CTL3_UCHE		0x16
+#define A4XX_RBBM_CLOCK_CTL4_UCHE		0x17
+#define A4XX_RBBM_CLOCK_HYST_UCHE		0x18
+#define A4XX_RBBM_CLOCK_DELAY_UCHE		0x19
+#define A4XX_RBBM_CLOCK_MODE_GPC		0x1a
+#define A4XX_RBBM_CLOCK_DELAY_GPC		0x1b
+#define A4XX_RBBM_CLOCK_HYST_GPC		0x1c
+#define A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM	0x1d
+#define A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM	0x1e
+#define A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM	0x1f
+#define A4XX_RBBM_CLOCK_CTL			0x20
+#define A4XX_RBBM_SP_HYST_CNT			0x21
+#define A4XX_RBBM_SW_RESET_CMD			0x22
+#define A4XX_RBBM_AHB_CTL0			0x23
+#define A4XX_RBBM_AHB_CTL1			0x24
+#define A4XX_RBBM_AHB_CMD			0x25
+#define A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL		0x2b
+#define A4XX_RBBM_INTERFACE_HANG_INT_CTL	0x2f
+#define A4XX_RBBM_INT_CLEAR_CMD			0x36
+#define A4XX_RBBM_INT_0_MASK			0x37
+#define A4XX_RBBM_RBBM_CTL			0x3e
+#define A4XX_RBBM_CLOCK_CTL2			0x42
+#define A4XX_RBBM_BLOCK_SW_RESET_CMD		0x45
+#define A4XX_RBBM_EXT_TRACE_BUS_CTL		0x49
+#define A4XX_RBBM_CFG_DEBBUS_SEL_A		0x4a
+#define A4XX_RBBM_CFG_DEBBUS_SEL_B		0x4b
+#define A4XX_RBBM_CFG_DEBBUS_SEL_C		0x4c
+#define A4XX_RBBM_CFG_DEBBUS_SEL_D		0x4d
+#define A4XX_RBBM_CFG_DEBBUS_SEL_PING_INDEX_SHIFT	0
+#define A4XX_RBBM_CFG_DEBBUS_SEL_PING_BLK_SEL_SHIFT	8
+
+#define A4XX_RBBM_CFG_DEBBUS_CTLT		0x4e
+
+#define A4XX_RBBM_CFG_DEBBUS_CTLM		0x4f
+#define A4XX_RBBM_CFG_DEBBUS_CTLT_ENABLE_SHIFT		24
+
+#define A4XX_RBBM_CFG_DEBBUS_OPL		0x50
+#define A4XX_RBBM_CFG_DEBBUS_OPE		0x51
+#define A4XX_RBBM_CFG_DEBBUS_IVTL_0		0x52
+#define A4XX_RBBM_CFG_DEBBUS_IVTL_1		0x53
+#define A4XX_RBBM_CFG_DEBBUS_IVTL_2		0x54
+#define A4XX_RBBM_CFG_DEBBUS_IVTL_3		0x55
+
+#define A4XX_RBBM_CFG_DEBBUS_MASKL_0		0x56
+#define A4XX_RBBM_CFG_DEBBUS_MASKL_1		0x57
+#define A4XX_RBBM_CFG_DEBBUS_MASKL_2		0x58
+#define A4XX_RBBM_CFG_DEBBUS_MASKL_3		0x59
+
+
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_0		0x5a
+#define A4XX_RBBM_CFG_DEBBUS_BYTEL_1		0x5b
+
+#define A4XX_RBBM_CFG_DEBBUS_IVTE_0		0x5c
+#define A4XX_RBBM_CFG_DEBBUS_IVTE_1		0x5d
+#define A4XX_RBBM_CFG_DEBBUS_IVTE_2		0x5e
+#define A4XX_RBBM_CFG_DEBBUS_IVTE_3		0x5f
+#define A4XX_RBBM_CFG_DEBBUS_MASKE_0		0x60
+#define A4XX_RBBM_CFG_DEBBUS_MASKE_1		0x61
+#define A4XX_RBBM_CFG_DEBBUS_MASKE_2		0x62
+#define A4XX_RBBM_CFG_DEBBUS_MASKE_3		0x63
+#define A4XX_RBBM_CFG_DEBBUS_NIBBLEE		0x64
+#define A4XX_RBBM_CFG_DEBBUS_PTRC0		0x65
+#define A4XX_RBBM_CFG_DEBBUS_PTRC1		0x66
+#define A4XX_RBBM_CFG_DEBBUS_LOADREG		0x67
+#define A4XX_RBBM_CLOCK_CTL_SP0			0x68
+#define A4XX_RBBM_CLOCK_CTL_SP1			0x69
+#define A4XX_RBBM_CLOCK_CTL_SP2			0x6A
+#define A4XX_RBBM_CLOCK_CTL_SP3			0x6B
+#define A4XX_RBBM_CLOCK_CTL2_SP0		0x6C
+#define A4XX_RBBM_CLOCK_CTL2_SP1		0x6D
+#define A4XX_RBBM_CLOCK_CTL2_SP2		0x6E
+#define A4XX_RBBM_CLOCK_CTL2_SP3		0x6F
+#define A4XX_RBBM_CLOCK_HYST_SP0		0x70
+#define A4XX_RBBM_CLOCK_HYST_SP1		0x71
+#define A4XX_RBBM_CLOCK_HYST_SP2		0x72
+#define A4XX_RBBM_CLOCK_HYST_SP3		0x73
+#define A4XX_RBBM_CLOCK_DELAY_SP0		0x74
+#define A4XX_RBBM_CLOCK_DELAY_SP1		0x75
+#define A4XX_RBBM_CLOCK_DELAY_SP2		0x76
+#define A4XX_RBBM_CLOCK_DELAY_SP3		0x77
+#define A4XX_RBBM_CLOCK_CTL_RB0			0x78
+#define A4XX_RBBM_CLOCK_CTL_RB1			0x79
+#define A4XX_RBBM_CLOCK_CTL_RB2			0x7A
+#define A4XX_RBBM_CLOCK_CTL_RB3			0x7B
+#define A4XX_RBBM_CLOCK_CTL2_RB0		0x7C
+#define A4XX_RBBM_CLOCK_CTL2_RB1		0x7D
+#define A4XX_RBBM_CLOCK_CTL2_RB2		0x7E
+#define A4XX_RBBM_CLOCK_CTL2_RB3		0x7F
+#define A4XX_RBBM_CLOCK_HYST_COM_DCOM		0x80
+#define A4XX_RBBM_CLOCK_CTL_COM_DCOM		0x81
+#define A4XX_RBBM_CLOCK_CTL_MARB_CCU0		0x82
+#define A4XX_RBBM_CLOCK_CTL_MARB_CCU1		0x83
+#define A4XX_RBBM_CLOCK_CTL_MARB_CCU2		0x84
+#define A4XX_RBBM_CLOCK_CTL_MARB_CCU3		0x85
+#define A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU0	0x86
+#define A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU1	0x87
+#define A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU2	0x88
+#define A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU3	0x89
+#define A4XX_RBBM_CLOCK_CTL_HLSQ		0x8a
+#define A4XX_RBBM_CLOCK_HYST_HLSQ		0x8b
+
+#define A4XX_RBBM_CLOCK_DELAY_HLSQ		0x8c
+#define A4XX_CGC_HLSQ_TP_EARLY_CYC_MASK		0x00700000
+#define A4XX_CGC_HLSQ_TP_EARLY_CYC_SHIFT	20
+
+#define A4XX_RBBM_CLOCK_DELAY_COM_DCOM		0x8d
+#define A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_0	0x8e
+#define A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_1	0x8f
+#define A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_2	0x90
+#define A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_3	0x91
+
+#define A4XX_RBBM_CFG_DEBBUS_IDX		0x93
+#define A4XX_RBBM_CFG_DEBBUS_CLRC		0x94
+#define A4XX_RBBM_CFG_DEBBUS_LOADIVT		0x95
+
+#define A4XX_RBBM_CLOCK_CTL_IP			0x97
+#define A4XX_RBBM_POWER_CNTL_IP			0x98
+#define A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0	0x99
+#define A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1	0x9a
+#define A4XX_RBBM_PERFCTR_CP_0_LO		0x9c
+#define A4XX_RBBM_PERFCTR_CP_0_HI		0x9d
+#define A4XX_RBBM_PERFCTR_CP_1_LO		0x9e
+#define A4XX_RBBM_PERFCTR_CP_1_HI		0x9f
+#define A4XX_RBBM_PERFCTR_CP_2_LO		0xa0
+#define A4XX_RBBM_PERFCTR_CP_2_HI		0xa1
+#define A4XX_RBBM_PERFCTR_CP_3_LO		0xa2
+#define A4XX_RBBM_PERFCTR_CP_3_HI		0xa3
+#define A4XX_RBBM_PERFCTR_CP_4_LO		0xa4
+#define A4XX_RBBM_PERFCTR_CP_4_HI		0xa5
+#define A4XX_RBBM_PERFCTR_CP_5_LO		0xa6
+#define A4XX_RBBM_PERFCTR_CP_5_HI		0xa7
+#define A4XX_RBBM_PERFCTR_CP_6_LO		0xa8
+#define A4XX_RBBM_PERFCTR_CP_6_HI		0xa9
+#define A4XX_RBBM_PERFCTR_CP_7_LO		0xaa
+#define A4XX_RBBM_PERFCTR_CP_7_HI		0xab
+#define A4XX_RBBM_PERFCTR_RBBM_0_LO		0xac
+#define A4XX_RBBM_PERFCTR_RBBM_0_HI		0xad
+#define A4XX_RBBM_PERFCTR_RBBM_1_LO		0xae
+#define A4XX_RBBM_PERFCTR_RBBM_1_HI		0xaf
+#define A4XX_RBBM_PERFCTR_RBBM_2_LO		0xb0
+#define A4XX_RBBM_PERFCTR_RBBM_2_HI		0xb1
+#define A4XX_RBBM_PERFCTR_RBBM_3_LO		0xb2
+#define A4XX_RBBM_PERFCTR_RBBM_3_HI		0xb3
+#define A4XX_RBBM_PERFCTR_PC_0_LO		0xb4
+#define A4XX_RBBM_PERFCTR_PC_0_HI		0xb5
+#define A4XX_RBBM_PERFCTR_PC_1_LO		0xb6
+#define A4XX_RBBM_PERFCTR_PC_1_HI		0xb7
+#define A4XX_RBBM_PERFCTR_PC_2_LO		0xb8
+#define A4XX_RBBM_PERFCTR_PC_2_HI		0xb9
+#define A4XX_RBBM_PERFCTR_PC_3_LO		0xba
+#define A4XX_RBBM_PERFCTR_PC_3_HI		0xbb
+#define A4XX_RBBM_PERFCTR_PC_4_LO		0xbc
+#define A4XX_RBBM_PERFCTR_PC_4_HI		0xbd
+#define A4XX_RBBM_PERFCTR_PC_5_LO		0xbe
+#define A4XX_RBBM_PERFCTR_PC_5_HI		0xbf
+#define A4XX_RBBM_PERFCTR_PC_6_LO		0xc0
+#define A4XX_RBBM_PERFCTR_PC_6_HI		0xc1
+#define A4XX_RBBM_PERFCTR_PC_7_LO		0xc2
+#define A4XX_RBBM_PERFCTR_PC_7_HI		0xc3
+#define A4XX_RBBM_PERFCTR_VFD_0_LO		0xc4
+#define A4XX_RBBM_PERFCTR_VFD_0_HI		0xc5
+#define A4XX_RBBM_PERFCTR_VFD_1_LO		0xc6
+#define A4XX_RBBM_PERFCTR_VFD_1_HI		0xc7
+#define A4XX_RBBM_PERFCTR_VFD_2_LO		0xc8
+#define A4XX_RBBM_PERFCTR_VFD_2_HI		0xc9
+#define A4XX_RBBM_PERFCTR_VFD_3_LO		0xca
+#define A4XX_RBBM_PERFCTR_VFD_3_HI		0xcb
+#define A4XX_RBBM_PERFCTR_VFD_4_LO		0xcc
+#define A4XX_RBBM_PERFCTR_VFD_4_HI		0xcd
+#define A4XX_RBBM_PERFCTR_VFD_5_LO		0xce
+#define A4XX_RBBM_PERFCTR_VFD_5_HI		0xcf
+#define A4XX_RBBM_PERFCTR_VFD_6_LO		0xd0
+#define A4XX_RBBM_PERFCTR_VFD_6_HI		0xd1
+#define A4XX_RBBM_PERFCTR_VFD_7_LO		0xd2
+#define A4XX_RBBM_PERFCTR_VFD_7_HI		0xd3
+#define A4XX_RBBM_PERFCTR_HLSQ_0_LO		0xd4
+#define A4XX_RBBM_PERFCTR_HLSQ_0_HI		0xd5
+#define A4XX_RBBM_PERFCTR_HLSQ_1_LO		0xd6
+#define A4XX_RBBM_PERFCTR_HLSQ_1_HI		0xd7
+#define A4XX_RBBM_PERFCTR_HLSQ_2_LO		0xd8
+#define A4XX_RBBM_PERFCTR_HLSQ_2_HI		0xd9
+#define A4XX_RBBM_PERFCTR_HLSQ_3_LO		0xda
+#define A4XX_RBBM_PERFCTR_HLSQ_3_HI		0xdb
+#define A4XX_RBBM_PERFCTR_HLSQ_4_LO		0xdc
+#define A4XX_RBBM_PERFCTR_HLSQ_4_HI		0xdd
+#define A4XX_RBBM_PERFCTR_HLSQ_5_LO		0xde
+#define A4XX_RBBM_PERFCTR_HLSQ_5_HI		0xdf
+#define A4XX_RBBM_PERFCTR_HLSQ_6_LO		0xe0
+#define A4XX_RBBM_PERFCTR_HLSQ_6_HI		0xe1
+#define A4XX_RBBM_PERFCTR_HLSQ_7_LO		0xe2
+#define A4XX_RBBM_PERFCTR_HLSQ_7_HI		0xe3
+#define A4XX_RBBM_PERFCTR_VPC_0_LO		0xe4
+#define A4XX_RBBM_PERFCTR_VPC_0_HI		0xe5
+#define A4XX_RBBM_PERFCTR_VPC_1_LO		0xe6
+#define A4XX_RBBM_PERFCTR_VPC_1_HI		0xe7
+#define A4XX_RBBM_PERFCTR_VPC_2_LO		0xe8
+#define A4XX_RBBM_PERFCTR_VPC_2_HI		0xe9
+#define A4XX_RBBM_PERFCTR_VPC_3_LO		0xea
+#define A4XX_RBBM_PERFCTR_VPC_3_HI		0xeb
+#define A4XX_RBBM_PERFCTR_CCU_0_LO		0xec
+#define A4XX_RBBM_PERFCTR_CCU_0_HI		0xed
+#define A4XX_RBBM_PERFCTR_CCU_1_LO		0xee
+#define A4XX_RBBM_PERFCTR_CCU_1_HI		0xef
+#define A4XX_RBBM_PERFCTR_CCU_2_LO		0xf0
+#define A4XX_RBBM_PERFCTR_CCU_2_HI		0xf1
+#define A4XX_RBBM_PERFCTR_CCU_3_LO		0xf2
+#define A4XX_RBBM_PERFCTR_CCU_3_HI		0xf3
+#define A4XX_RBBM_PERFCTR_TSE_0_LO		0xf4
+#define A4XX_RBBM_PERFCTR_TSE_0_HI		0xf5
+#define A4XX_RBBM_PERFCTR_TSE_1_LO		0xf6
+#define A4XX_RBBM_PERFCTR_TSE_1_HI		0xf7
+#define A4XX_RBBM_PERFCTR_TSE_2_LO		0xf8
+#define A4XX_RBBM_PERFCTR_TSE_2_HI		0xf9
+#define A4XX_RBBM_PERFCTR_TSE_3_LO		0xfa
+#define A4XX_RBBM_PERFCTR_TSE_3_HI		0xfb
+#define A4XX_RBBM_PERFCTR_RAS_0_LO		0xfc
+#define A4XX_RBBM_PERFCTR_RAS_0_HI		0xfd
+#define A4XX_RBBM_PERFCTR_RAS_1_LO		0xfe
+#define A4XX_RBBM_PERFCTR_RAS_1_HI		0xff
+#define A4XX_RBBM_PERFCTR_RAS_2_LO		0x100
+#define A4XX_RBBM_PERFCTR_RAS_2_HI		0x101
+#define A4XX_RBBM_PERFCTR_RAS_3_LO		0x102
+#define A4XX_RBBM_PERFCTR_RAS_3_HI		0x103
+#define A4XX_RBBM_PERFCTR_UCHE_0_LO		0x104
+#define A4XX_RBBM_PERFCTR_UCHE_0_HI		0x105
+#define A4XX_RBBM_PERFCTR_UCHE_1_LO		0x106
+#define A4XX_RBBM_PERFCTR_UCHE_1_HI		0x107
+#define A4XX_RBBM_PERFCTR_UCHE_2_LO		0x108
+#define A4XX_RBBM_PERFCTR_UCHE_2_HI		0x109
+#define A4XX_RBBM_PERFCTR_UCHE_3_LO		0x10a
+#define A4XX_RBBM_PERFCTR_UCHE_3_HI		0x10b
+#define A4XX_RBBM_PERFCTR_UCHE_4_LO		0x10c
+#define A4XX_RBBM_PERFCTR_UCHE_4_HI		0x10d
+#define A4XX_RBBM_PERFCTR_UCHE_5_LO		0x10e
+#define A4XX_RBBM_PERFCTR_UCHE_5_HI		0x10f
+#define A4XX_RBBM_PERFCTR_UCHE_6_LO		0x110
+#define A4XX_RBBM_PERFCTR_UCHE_6_HI		0x111
+#define A4XX_RBBM_PERFCTR_UCHE_7_LO		0x112
+#define A4XX_RBBM_PERFCTR_UCHE_7_HI		0x113
+#define A4XX_RBBM_PERFCTR_TP_0_LO		0x114
+#define A4XX_RBBM_PERFCTR_TP_0_HI		0x115
+#define A4XX_RBBM_PERFCTR_TP_1_LO		0x116
+#define A4XX_RBBM_PERFCTR_TP_1_HI		0x117
+#define A4XX_RBBM_PERFCTR_TP_2_LO		0x118
+#define A4XX_RBBM_PERFCTR_TP_2_HI		0x119
+#define A4XX_RBBM_PERFCTR_TP_3_LO		0x11a
+#define A4XX_RBBM_PERFCTR_TP_3_HI		0x11b
+#define A4XX_RBBM_PERFCTR_TP_4_LO		0x11c
+#define A4XX_RBBM_PERFCTR_TP_4_HI		0x11d
+#define A4XX_RBBM_PERFCTR_TP_5_LO		0x11e
+#define A4XX_RBBM_PERFCTR_TP_5_HI		0x11f
+#define A4XX_RBBM_PERFCTR_TP_6_LO		0x120
+#define A4XX_RBBM_PERFCTR_TP_6_HI		0x121
+#define A4XX_RBBM_PERFCTR_TP_7_LO		0x122
+#define A4XX_RBBM_PERFCTR_TP_7_HI		0x123
+#define A4XX_RBBM_PERFCTR_SP_0_LO		0x124
+#define A4XX_RBBM_PERFCTR_SP_0_HI		0x125
+#define A4XX_RBBM_PERFCTR_SP_1_LO		0x126
+#define A4XX_RBBM_PERFCTR_SP_1_HI		0x127
+#define A4XX_RBBM_PERFCTR_SP_2_LO		0x128
+#define A4XX_RBBM_PERFCTR_SP_2_HI		0x129
+#define A4XX_RBBM_PERFCTR_SP_3_LO		0x12a
+#define A4XX_RBBM_PERFCTR_SP_3_HI		0x12b
+#define A4XX_RBBM_PERFCTR_SP_4_LO		0x12c
+#define A4XX_RBBM_PERFCTR_SP_4_HI		0x12d
+#define A4XX_RBBM_PERFCTR_SP_5_LO		0x12e
+#define A4XX_RBBM_PERFCTR_SP_5_HI		0x12f
+#define A4XX_RBBM_PERFCTR_SP_6_LO		0x130
+#define A4XX_RBBM_PERFCTR_SP_6_HI		0x131
+#define A4XX_RBBM_PERFCTR_SP_7_LO		0x132
+#define A4XX_RBBM_PERFCTR_SP_7_HI		0x133
+#define A4XX_RBBM_PERFCTR_SP_8_LO		0x134
+#define A4XX_RBBM_PERFCTR_SP_8_HI		0x135
+#define A4XX_RBBM_PERFCTR_SP_9_LO		0x136
+#define A4XX_RBBM_PERFCTR_SP_9_HI		0x137
+#define A4XX_RBBM_PERFCTR_SP_10_LO		0x138
+#define A4XX_RBBM_PERFCTR_SP_10_HI		0x139
+#define A4XX_RBBM_PERFCTR_SP_11_LO		0x13a
+#define A4XX_RBBM_PERFCTR_SP_11_HI		0x13b
+#define A4XX_RBBM_PERFCTR_RB_0_LO		0x13c
+#define A4XX_RBBM_PERFCTR_RB_0_HI		0x13d
+#define A4XX_RBBM_PERFCTR_RB_1_LO		0x13e
+#define A4XX_RBBM_PERFCTR_RB_1_HI		0x13f
+#define A4XX_RBBM_PERFCTR_RB_2_LO		0x140
+#define A4XX_RBBM_PERFCTR_RB_2_HI		0x141
+#define A4XX_RBBM_PERFCTR_RB_3_LO		0x142
+#define A4XX_RBBM_PERFCTR_RB_3_HI		0x143
+#define A4XX_RBBM_PERFCTR_RB_4_LO		0x144
+#define A4XX_RBBM_PERFCTR_RB_4_HI		0x145
+#define A4XX_RBBM_PERFCTR_RB_5_LO		0x146
+#define A4XX_RBBM_PERFCTR_RB_5_HI		0x147
+#define A4XX_RBBM_PERFCTR_RB_6_LO		0x148
+#define A4XX_RBBM_PERFCTR_RB_6_HI		0x149
+#define A4XX_RBBM_PERFCTR_RB_7_LO		0x14a
+#define A4XX_RBBM_PERFCTR_RB_7_HI		0x14b
+#define A4XX_RBBM_PERFCTR_VSC_0_LO		0x14c
+#define A4XX_RBBM_PERFCTR_VSC_0_HI		0x14d
+#define A4XX_RBBM_PERFCTR_VSC_1_LO		0x14e
+#define A4XX_RBBM_PERFCTR_VSC_1_HI		0x14f
+#define A4XX_RBBM_PERFCTR_PWR_0_LO		0x166
+#define A4XX_RBBM_PERFCTR_PWR_0_HI		0x167
+#define A4XX_RBBM_PERFCTR_PWR_1_LO		0x168
+#define A4XX_RBBM_PERFCTR_PWR_1_HI		0x169
+#define A4XX_RBBM_ALWAYSON_COUNTER_LO		0x16e
+#define A4XX_RBBM_ALWAYSON_COUNTER_HI		0x16f
+#define A4XX_RBBM_PERFCTR_CTL			0x170
+#define A4XX_RBBM_PERFCTR_LOAD_CMD0		0x171
+#define A4XX_RBBM_PERFCTR_LOAD_CMD1		0x172
+#define A4XX_RBBM_PERFCTR_LOAD_CMD2		0x173
+#define A4XX_RBBM_PERFCTR_LOAD_VALUE_LO		0x174
+#define A4XX_RBBM_PERFCTR_LOAD_VALUE_HI		0x175
+#define A4XX_RBBM_PERFCTR_RBBM_SEL_0		0x176
+#define A4XX_RBBM_PERFCTR_RBBM_SEL_1		0x177
+#define A4XX_RBBM_PERFCTR_RBBM_SEL_2		0x178
+#define A4XX_RBBM_PERFCTR_RBBM_SEL_3		0x179
+#define A4XX_RBBM_GPU_BUSY_MASKED		0x17a
+#define A4XX_RBBM_INT_0_STATUS			0x17d
+#define A4XX_RBBM_AHB_ME_SPLIT_STATUS		0x18c
+#define A4XX_RBBM_AHB_PFP_SPLIT_STATUS		0x18d
+#define A4XX_RBBM_AHB_ERROR_STATUS		0x18f
+#define A4XX_RBBM_STATUS			0x191
+#define A4XX_RBBM_CFG_DEBBUS_TRACE_BUF4		0x1ad
+#define A4XX_RBBM_POWER_STATUS			0x1b0
+#define A4XX_RBBM_PPD_V2_SP_PWR_WEIGHTS		0x1b2
+#define A4XX_RBBM_PPD_V2_SP_RB_EPOCH_TH		0x1b3
+#define A4XX_RBBM_PPD_V2_TP_CONFIG		0x1b4
+#define A4XX_RBBM_PPD_RAMP_V2_CONTROL		0x1b5
+#define A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2		0x1b8
+#define A4XX_RBBM_PPD_CTRL			0x1b9
+#define A4XX_RBBM_PPD_EPOCH_INTER_TH_HIGH_CLEAR_THR  0x1bc
+#define A4XX_RBBM_PPD_EPOCH_INTER_TH_LOW	0x1bd
+/* SECVID registers */
+#define A4XX_RBBM_SECVID_TRUST_CONFIG		0xf000
+#define A4XX_RBBM_SECVID_TRUST_CONTROL		0xf400
+#define A4XX_RBBM_SECVID_TSB_TRUSTED_BASE	0xf800
+#define A4XX_RBBM_SECVID_TSB_TRUSTED_SIZE	0xf801
+#define A4XX_RBBM_SECVID_TSB_CONTROL		0xf802
+
+/* CP registers */
+#define A4XX_CP_RB_BASE			0x200
+#define A4XX_CP_RB_CNTL			0x201
+#define A4XX_CP_RB_RPTR_ADDR		0x203
+#define A4XX_CP_RB_RPTR			0x204
+#define A4XX_CP_RB_WPTR			0x205
+#define A4XX_CP_IB1_BASE		0x206
+#define A4XX_CP_IB1_BUFSZ		0x207
+#define A4XX_CP_IB2_BASE		0x208
+#define A4XX_CP_IB2_BUFSZ		0x209
+#define A4XX_CP_ROQ_ADDR		0x21C
+#define A4XX_CP_ROQ_DATA		0x21D
+#define A4XX_CP_MEQ_ADDR		0x21E
+#define A4XX_CP_MEQ_DATA		0x21F
+#define A4XX_CP_MERCIU_ADDR		0x220
+#define A4XX_CP_MERCIU_DATA		0x221
+#define A4XX_CP_MERCIU_DATA2		0x222
+#define A4XX_CP_PFP_UCODE_ADDR		0x223
+#define A4XX_CP_PFP_UCODE_DATA		0x224
+#define A4XX_CP_ME_RAM_WADDR		0x225
+#define A4XX_CP_ME_RAM_RADDR		0x226
+#define A4XX_CP_ME_RAM_DATA		0x227
+
+#define A4XX_CP_PREEMPT			0x22a
+
+#define A4XX_CP_PREEMPT_DISABLE		0x22b
+#define A4XX_CP_CNTL			0x22c
+#define A4XX_CP_ME_CNTL			0x22d
+#define A4XX_CP_DEBUG			0x22e
+#define A4XX_CP_STATE_DEBUG_INDEX	0x22f
+#define A4XX_CP_STATE_DEBUG_DATA	0x230
+#define A4XX_CP_POWER_COLLAPSE_CNTL	0x234
+/*
+ * CP debug settings for A4xx cores
+ * MIU_128BIT_WRITE_ENABLE [25] - Allow 128 bit writes to the VBIF
+ */
+#define A4XX_CP_DEBUG_DEFAULT (1 << 25)
+
+#define A4XX_CP_PROTECT_REG_0		0x240
+#define A4XX_CP_PROTECT_CTRL		0x250
+#define A4XX_CP_PROTECT_REG_10          0x251
+
+#define A4XX_CP_ME_STATUS		0x4d1
+#define A4XX_CP_CNTL			0x22c
+#define A4XX_CP_WFI_PEND_CTR		0x4d2
+#define A4XX_CP_PREEMPT_DEBUG		0x4d6
+#define A4XX_CP_HW_FAULT		0x4d8
+#define A4XX_CP_PROTECT_STATUS		0x4da
+#define A4XX_CP_PERFCTR_CP_SEL_0	0x500
+#define A4XX_CP_PERFCTR_CP_SEL_1	0x501
+#define A4XX_CP_PERFCTR_CP_SEL_2	0x502
+#define A4XX_CP_PERFCTR_CP_SEL_3	0x503
+#define A4XX_CP_PERFCTR_CP_SEL_4	0x504
+#define A4XX_CP_PERFCTR_CP_SEL_5	0x505
+#define A4XX_CP_PERFCTR_CP_SEL_6	0x506
+#define A4XX_CP_PERFCTR_CP_SEL_7	0x507
+
+#define A4XX_CP_SCRATCH_REG6		0x57e
+#define A4XX_CP_SCRATCH_REG7		0x57f
+#define A4XX_CP_SCRATCH_REG8		0x580
+#define A4XX_CP_SCRATCH_REG9		0x581
+#define A4XX_CP_SCRATCH_REG10		0x582
+#define A4XX_CP_SCRATCH_REG11		0x583
+#define A4XX_CP_SCRATCH_REG12		0x584
+#define A4XX_CP_SCRATCH_REG13		0x585
+#define A4XX_CP_SCRATCH_REG14		0x586
+#define A4XX_CP_SCRATCH_REG15		0x587
+#define A4XX_CP_SCRATCH_REG16		0x588
+#define A4XX_CP_SCRATCH_REG17		0x589
+#define A4XX_CP_SCRATCH_REG18		0x58a
+#define A4XX_CP_SCRATCH_REG23		0x58f
+
+/* SP registers */
+#define A4XX_SP_SP_CTRL			0x22C0
+#define A4XX_SP_INSTR_CACHE_CTRL	0x22c1
+#define A4XX_SP_VS_OBJ_START		0x22e1
+#define A4XX_SP_VS_PVT_MEM_ADDR		0x22e3
+#define A4XX_SP_FS_CTRL_1		0x22e9
+#define A4XX_SP_FS_OBJ_START		0x22eb
+#define A4XX_SP_FS_PVT_MEM_ADDR		0x22ed
+#define A4XX_SP_CS_CTRL_0		0x2300
+#define A4XX_SP_CS_OBJ_OFFSET		0x2301
+#define A4XX_SP_CS_OBJ_START		0x2302
+#define A4XX_SP_CS_LENGTH		0x2306
+#define A4XX_SP_MODE_CONTROL		0xec3
+#define A4XX_SP_PERFCTR_SP_SEL_0	0xec4
+#define A4XX_SP_PERFCTR_SP_SEL_1	0xec5
+#define A4XX_SP_PERFCTR_SP_SEL_2	0xec6
+#define A4XX_SP_PERFCTR_SP_SEL_3	0xec7
+#define A4XX_SP_PERFCTR_SP_SEL_4	0xec8
+#define A4XX_SP_PERFCTR_SP_SEL_5	0xec9
+#define A4XX_SP_PERFCTR_SP_SEL_6	0xeca
+#define A4XX_SP_PERFCTR_SP_SEL_7	0xecb
+#define A4XX_SP_PERFCTR_SP_SEL_8	0xecc
+#define A4XX_SP_PERFCTR_SP_SEL_9	0xecd
+#define A4XX_SP_PERFCTR_SP_SEL_10	0xece
+#define A4XX_SP_PERFCTR_SP_SEL_11	0xecf
+#define A4XX_SP_VS_PVT_MEM_ADDR		0x22e3
+#define A4XX_SP_FS_PVT_MEM_ADDR		0x22ed
+#define A4XX_SP_VS_OBJ_START		0x22e1
+#define A4XX_SP_FS_OBJ_START		0x22eb
+
+/* COUNTABLE FOR SP PERFCOUNTER */
+#define A4XX_SP_ALU_ACTIVE_CYCLES	0x1D
+#define A4XX_SP0_ICL1_MISSES		0x1A
+#define A4XX_SP_FS_CFLOW_INSTRUCTIONS	0x0C
+
+/* COUNTABLE FOR TSE PERFCOUNTER */
+#define A4XX_TSE_INPUT_PRIM_NUM		0x0
+
+enum a4xx_sp_perfctr_sp_sel {
+	SP_FS_STAGE_BARY_INSTRUCTIONS = 0x10,
+};
+
+/* VPC registers */
+#define A4XX_VPC_DEBUG_RAM_SEL		0xe60
+#define A4XX_VPC_DEBUG_RAM_READ		0xe61
+#define A4XX_VPC_PERFCTR_VPC_SEL_0	0xe65
+#define A4XX_VPC_PERFCTR_VPC_SEL_1	0xe66
+#define A4XX_VPC_PERFCTR_VPC_SEL_2	0xe67
+#define A4XX_VPC_PERFCTR_VPC_SEL_3	0xe68
+
+/* UCHE register */
+#define UCHE_TRAP_BASE_LO               0xe83
+#define UCHE_TRAP_BASE_HI               0xe84
+#define A4XX_UCHE_INVALIDATE0		0xe8a
+#define A4XX_UCHE_INVALIDATE1		0xe8b
+#define A4XX_UCHE_CACHE_WAYS_VFD	0xe8c
+
+/* VSC registers */
+#define A4XX_VSC_SIZE_ADDRESS		0xc01
+#define A4XX_VSC_PIPE_DATA_ADDRESS_0	0xc10
+#define A4XX_VSC_PIPE_DATA_ADDRESS_1	0xc11
+#define A4XX_VSC_PIPE_DATA_ADDRESS_2	0xc12
+#define A4XX_VSC_PIPE_DATA_ADDRESS_3	0xc13
+#define A4XX_VSC_PIPE_DATA_ADDRESS_4	0xc14
+#define A4XX_VSC_PIPE_DATA_ADDRESS_5	0xc15
+#define A4XX_VSC_PIPE_DATA_ADDRESS_6	0xc16
+#define A4XX_VSC_PIPE_DATA_ADDRESS_7	0xc17
+#define A4XX_VSC_PIPE_DATA_LENGTH_0	0xc18
+#define A4XX_VSC_PIPE_DATA_LENGTH_1	0xc19
+#define A4XX_VSC_PIPE_DATA_LENGTH_2	0xc1a
+#define A4XX_VSC_PIPE_DATA_LENGTH_3	0xc1b
+#define A4XX_VSC_PIPE_DATA_LENGTH_4	0xc1c
+#define A4XX_VSC_PIPE_DATA_LENGTH_5	0xc1d
+#define A4XX_VSC_PIPE_DATA_LENGTH_6	0xc1e
+#define A4XX_VSC_PIPE_DATA_LENGTH_7	0xc1f
+#define A4XX_VSC_PERFCTR_VSC_SEL_0	0xc50
+#define A4XX_VSC_PERFCTR_VSC_SEL_1	0xc51
+
+/* VFD registers */
+#define A4XX_VFD_FETCH_INSTR_1_31	0x2287
+#define A4XX_VFD_PERFCTR_VFD_SEL_0	0xe43
+#define A4XX_VFD_PERFCTR_VFD_SEL_1	0xe44
+#define A4XX_VFD_PERFCTR_VFD_SEL_2	0xe45
+#define A4XX_VFD_PERFCTR_VFD_SEL_3	0xe46
+#define A4XX_VFD_PERFCTR_VFD_SEL_4	0xe47
+#define A4XX_VFD_PERFCTR_VFD_SEL_5	0xe48
+#define A4XX_VFD_PERFCTR_VFD_SEL_6	0xe49
+#define A4XX_VFD_PERFCTR_VFD_SEL_7	0xe4a
+#define A4XX_VFD_FETCH_INSTR_1_0	0x220b
+#define A4XX_VFD_FETCH_INSTR_1_1	0x220f
+#define A4XX_VFD_FETCH_INSTR_1_2	0x2213
+#define A4XX_VFD_FETCH_INSTR_1_3	0x2217
+#define A4XX_VFD_FETCH_INSTR_1_4	0x221b
+#define A4XX_VFD_FETCH_INSTR_1_5	0x221f
+#define A4XX_VFD_FETCH_INSTR_1_6	0x2223
+#define A4XX_VFD_FETCH_INSTR_1_7	0x2227
+#define A4XX_VFD_FETCH_INSTR_1_8	0x222b
+#define A4XX_VFD_FETCH_INSTR_1_9	0x222f
+#define A4XX_VFD_FETCH_INSTR_1_10	0x2233
+#define A4XX_VFD_FETCH_INSTR_1_11	0x2237
+#define A4XX_VFD_FETCH_INSTR_1_12	0x223b
+#define A4XX_VFD_FETCH_INSTR_1_13	0x223f
+#define A4XX_VFD_FETCH_INSTR_1_14	0x2243
+#define A4XX_VFD_FETCH_INSTR_1_15	0x2247
+#define A4XX_VFD_FETCH_INSTR_1_16	0x224b
+#define A4XX_VFD_FETCH_INSTR_1_17	0x224f
+#define A4XX_VFD_FETCH_INSTR_1_18	0x2253
+#define A4XX_VFD_FETCH_INSTR_1_19	0x2257
+#define A4XX_VFD_FETCH_INSTR_1_20	0x225b
+#define A4XX_VFD_FETCH_INSTR_1_21	0x225f
+#define A4XX_VFD_FETCH_INSTR_1_22	0x2263
+#define A4XX_VFD_FETCH_INSTR_1_23	0x2267
+#define A4XX_VFD_FETCH_INSTR_1_24	0x226b
+#define A4XX_VFD_FETCH_INSTR_1_25	0x226f
+#define A4XX_VFD_FETCH_INSTR_1_26	0x2273
+#define A4XX_VFD_FETCH_INSTR_1_27	0x2277
+#define A4XX_VFD_FETCH_INSTR_1_28	0x227b
+#define A4XX_VFD_FETCH_INSTR_1_29	0x227f
+#define A4XX_VFD_FETCH_INSTR_1_30	0x2283
+#define A4XX_VFD_FETCH_INSTR_1_31	0x2287
+
+
+enum a4xx_vfd_perfctr_vfd_sel {
+	VFD_VPC_BYPASS_TRANS = 0x2,
+	VFD_UPPER_SHADER_FIBERS = 0xb,
+	VFD_LOWER_SHADER_FIBERS = 0xc,
+};
+
+/* VBIF registers */
+#define A4XX_VBIF_VERSION			0x3000
+#define A4XX_VBIF_CLKON				0x3001
+#define A4XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK	0x1
+#define A4XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT	0x1
+
+#define A4XX_VBIF_ABIT_SORT		0x301c
+#define A4XX_VBIF_ABIT_SORT_CONF	0x301d
+#define A4XX_VBIF_GATE_OFF_WRREQ_EN	0x302a
+#define A4XX_VBIF_IN_RD_LIM_CONF0	0x302c
+#define A4XX_VBIF_IN_RD_LIM_CONF1	0x302d
+#define A4XX_VBIF_IN_WR_LIM_CONF0	0x3030
+#define A4XX_VBIF_IN_WR_LIM_CONF1	0x3031
+#define A4XX_VBIF_ROUND_ROBIN_QOS_ARB	0x3049
+
+#define A4XX_VBIF_XIN_HALT_CTRL0	0x3080
+#define A4XX_VBIF_XIN_HALT_CTRL0_MASK	0x1F
+#define A405_VBIF_XIN_HALT_CTRL0_MASK	0x3
+
+#define A4XX_VBIF_XIN_HALT_CTRL1	0x3081
+
+#define A4XX_VBIF_TEST_BUS_OUT_CTRL		0x3084
+#define A4XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK	0x1
+#define A4XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT	0x0
+
+#define A4XX_VBIF_TEST_BUS1_CTRL0	0x3085
+#define A4XX_VBIF_TEST_BUS1_CTRL1			0x3086
+#define A4XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK		0xF
+#define A4XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT	0
+
+#define A4XX_VBIF_TEST_BUS2_CTRL0	0x3087
+#define A4XX_VBIF_TEST_BUS2_CTRL1			0x3088
+#define A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK		0xF
+#define A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT	0x0
+
+#define A4XX_VBIF_TEST_BUS_OUT		0x308c
+
+#define A4XX_VBIF_PERF_CNT_SEL0		0x30d0
+#define A4XX_VBIF_PERF_CNT_SEL1		0x30d1
+#define A4XX_VBIF_PERF_CNT_SEL2		0x30d2
+#define A4XX_VBIF_PERF_CNT_SEL3		0x30d3
+#define A4XX_VBIF_PERF_CNT_LOW0		0x30d8
+#define A4XX_VBIF_PERF_CNT_LOW1		0x30d9
+#define A4XX_VBIF_PERF_CNT_LOW2		0x30da
+#define A4XX_VBIF_PERF_CNT_LOW3		0x30db
+#define A4XX_VBIF_PERF_CNT_HIGH0	0x30e0
+#define A4XX_VBIF_PERF_CNT_HIGH1	0x30e1
+#define A4XX_VBIF_PERF_CNT_HIGH2	0x30e2
+#define A4XX_VBIF_PERF_CNT_HIGH3	0x30e3
+
+#define A4XX_VBIF_PERF_PWR_CNT_EN0	0x3100
+#define A4XX_VBIF_PERF_PWR_CNT_EN1	0x3101
+#define A4XX_VBIF_PERF_PWR_CNT_EN2	0x3102
+#define A4XX_VBIF_PERF_PWR_CNT_EN3	0x3103
+#define A4XX_VBIF_PERF_PWR_CNT_LOW0	0x3110
+#define A4XX_VBIF_PERF_PWR_CNT_LOW1	0x3111
+#define A4XX_VBIF_PERF_PWR_CNT_LOW2	0x3112
+#define A4XX_VBIF_PERF_PWR_CNT_LOW3	0x3113
+#define A4XX_VBIF_PERF_PWR_CNT_HIGH0	0x3118
+#define A4XX_VBIF_PERF_PWR_CNT_HIGH1	0x3119
+#define A4XX_VBIF_PERF_PWR_CNT_HIGH2	0x311a
+#define A4XX_VBIF_PERF_PWR_CNT_HIGH3	0x311b
+
+/* GRAS registers */
+#define A4XX_GRAS_PERFCTR_TSE_SEL_0	0xc88
+#define A4XX_GRAS_PERFCTR_TSE_SEL_1	0xc89
+#define A4XX_GRAS_PERFCTR_TSE_SEL_2	0xc8a
+#define A4XX_GRAS_PERFCTR_TSE_SEL_3	0xc8b
+#define A4XX_GRAS_PERFCTR_RAS_SEL_0	0xc8c
+#define A4XX_GRAS_PERFCTR_RAS_SEL_1	0xc8d
+#define A4XX_GRAS_PERFCTR_RAS_SEL_2	0xc8e
+#define A4XX_GRAS_PERFCTR_RAS_SEL_3	0xc8f
+
+/* PC registers */
+#define A4XX_PC_PERFCTR_PC_SEL_0	0xd10
+#define A4XX_PC_PERFCTR_PC_SEL_1	0xd11
+#define A4XX_PC_PERFCTR_PC_SEL_2	0xd12
+#define A4XX_PC_PERFCTR_PC_SEL_3	0xd13
+#define A4XX_PC_PERFCTR_PC_SEL_4	0xd14
+#define A4XX_PC_PERFCTR_PC_SEL_5	0xd15
+#define A4XX_PC_PERFCTR_PC_SEL_6	0xd16
+#define A4XX_PC_PERFCTR_PC_SEL_7	0xd17
+
+enum a4xx_pc_perfctr_pc_sel {
+	PC_INSTANCES = 0x1,
+	PC_VERTEX_HITS = 0x8,
+	PC_GENERATED_FIBERS = 0x12,
+	PC_GENERATED_WAVES = 0x13,
+};
+
+/* HLSQ registers */
+#define A4XX_HLSQ_TIMEOUT_THRESHOLD     0xe00
+#define A4XX_HLSQ_MODE_CONTROL		0xe05
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_0	0xe06
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_1	0xe07
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_2	0xe08
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_3	0xe09
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_4	0xe0a
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_5	0xe0b
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_6	0xe0c
+#define A4XX_HLSQ_PERFCTR_HLSQ_SEL_7	0xe0d
+#define A4XX_HLSQ_SPTP_RDSEL		0xe30
+#define A4xx_HLSQ_CONTROL_0		0x23c0
+#define A4XX_HLSQ_CS_CONTROL		0x23ca
+#define A4XX_HLSQ_CL_NDRANGE_0		0x23cd
+#define A4XX_HLSQ_CL_CONTROL_0		0x23d4
+#define A4XX_HLSQ_CL_KERNEL_CONST	0x23d6
+#define A4XX_HLSQ_CL_KERNEL_GROUP_X	0x23d7
+#define A4XX_HLSQ_CL_WG_OFFSET		0x23da
+#define A4XX_HLSQ_UPDATE_CONTROL	0x23db
+
+enum a4xx_hlsq_perfctr_hlsq_sel {
+	HLSQ_SP_VS_STAGE_CONSTANT = 0x0,
+	HLSQ_SP_VS_STAGE_INSTRUCTIONS = 0x1,
+	HLSQ_SP_FS_STAGE_CONSTANT = 0x2,
+	HLSQ_SP_FS_STAGE_INSTRUCTIONS = 0x3,
+	HLSQ_FS_STAGE_16_WAVES = 0x8,
+	HLSQ_FS_STAGE_32_WAVES = 0x9,
+	HLSQ_FS_STAGE_64_WAVES = 0xa,
+	HLSQ_VS_STAGE_16_WAVES = 0xb,
+	HLSQ_VS_STAGE_32_WAVES = 0xc,
+};
+
+/* CCU registers */
+#define A4XX_RB_PERFCTR_CCU_SEL_0	0xccf
+#define A4XX_RB_PERFCTR_CCU_SEL_1	0xcd0
+#define A4XX_RB_PERFCTR_CCU_SEL_2	0xcd1
+#define A4XX_RB_PERFCTR_CCU_SEL_3	0xcd2
+
+enum a4xx_cu_perfctr_ccu_sel {
+	CCU_VBIF_STALL = 0x1,
+	CCU_VBIF_LATENCY_CYCLES = 0x4,
+	CCU_VBIF_LATENCY_SAMPLES = 0x5,
+	CCU_Z_READ = 0x13,
+	CCU_Z_WRITE = 0x14,
+	CCU_C_READ = 0x15,
+	CCU_C_WRITE = 0x16,
+};
+
+/* UCHE registers */
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_0	0xe8e
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_1	0xe8f
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_2	0xe90
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_3	0xe91
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_4	0xe92
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_5	0xe93
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_6	0xe94
+#define A4XX_UCHE_PERFCTR_UCHE_SEL_7	0xe95
+
+/* TPL1 registers */
+enum a4xx_uche_perfctr_uche_sel {
+	UCHE_READ_REQUESTS_MARB = 0x8,
+	UCHE_READ_REQUESTS_SP = 0x9,
+	UCHE_WRITE_REQUESTS_MARB = 0xa,
+	UCHE_WRITE_REQUESTS_SP = 0xb,
+	UCHE_WRITE_REQUESTS_VPC = 0x14,
+};
+
+/* TPL1 registers */
+#define A4XX_TPL1_TP_MODE_CONTROL	0xf03
+#define A4XX_TPL1_PERFCTR_TP_SEL_0	0xf04
+#define A4XX_TPL1_PERFCTR_TP_SEL_1	0xf05
+#define A4XX_TPL1_PERFCTR_TP_SEL_2	0xf06
+#define A4XX_TPL1_PERFCTR_TP_SEL_3	0xf07
+#define A4XX_TPL1_PERFCTR_TP_SEL_4	0xf08
+#define A4XX_TPL1_PERFCTR_TP_SEL_5	0xf09
+#define A4XX_TPL1_PERFCTR_TP_SEL_6	0xf0a
+#define A4XX_TPL1_PERFCTR_TP_SEL_7	0xf0b
+#define A4XX_TPL1_TP_TEX_TSIZE_1	0x23a0
+
+enum a4xx_tpl1_perfctr_tp_sel {
+	TP_OUTPUT_TEXELS_POINT = 0x2,
+	TP_OUTPUT_TEXELS_BILINEAR = 0x3,
+	TP_OUTPUT_TEXELS_MIP = 0x4,
+	TP_OUTPUT_TEXELS_ANISO = 0x5,
+	TP_OUTPUT_TEXELS_OPS16 = 0x6,
+	TP_OUTPUT_TEXELS_OPS32 = 0x7,
+	TP_ZERO_LOD = 0xe,
+	TP_LATENCY = 0x12,
+	TP_LATENCY_TRANS = 0x13,
+};
+
+/* Enum for debug bus */
+enum a4xx_rbbm_debbus_id {
+	A4XX_RBBM_DEBBUS_CP_ID = 0x1,
+	A4XX_RBBM_DEBBUS_RBBM_ID = 0x2,
+	A4XX_RBBM_DEBBUS_VBIF_ID = 0x3,
+	A4XX_RBBM_DEBBUS_HLSQ_ID = 0x4,
+	A4XX_RBBM_DEBBUS_UCHE_ID = 0x5,
+	A4XX_RBBM_DEBBUS_DPM_ID  = 0x6,
+	A4XX_RBBM_DEBBUS_TESS_ID = 0x7,
+	A4XX_RBBM_DEBBUS_PC_ID   = 0x8,
+	A4XX_RBBM_DEBBUS_VFD_ID  = 0x9,
+	A4XX_RBBM_DEBBUS_VPC_ID  = 0xa,
+	A4XX_RBBM_DEBBUS_TSE_ID  = 0xb,
+	A4XX_RBBM_DEBBUS_RAS_ID  = 0xc,
+	A4XX_RBBM_DEBBUS_VSC_ID  = 0xd,
+	A4XX_RBBM_DEBBUS_COM_ID  = 0xe,
+	A4XX_RBBM_DEBBUS_DCOM_ID = 0xf,
+	A4XX_RBBM_DEBBUS_SP_0_ID = 0x10,
+	A4XX_RBBM_DEBBUS_SP_1_ID = 0x11,
+	A4XX_RBBM_DEBBUS_SP_2_ID = 0x12,
+	A4XX_RBBM_DEBBUS_SP_3_ID = 0x13,
+	A4XX_RBBM_DEBBUS_TPL1_0_ID = 0x18,
+	A4XX_RBBM_DEBBUS_TPL1_1_ID = 0x19,
+	A4XX_RBBM_DEBBUS_TPL1_2_ID = 0x1a,
+	A4XX_RBBM_DEBBUS_TPL1_3_ID = 0x1b,
+	A4XX_RBBM_DEBBUS_RB_0_ID = 0x20,
+	A4XX_RBBM_DEBBUS_RB_1_ID = 0x21,
+	A4XX_RBBM_DEBBUS_RB_2_ID = 0x22,
+	A4XX_RBBM_DEBBUS_RB_3_ID = 0x23,
+	A4XX_RBBM_DEBBUS_MARB_0_ID = 0x28,
+	A4XX_RBBM_DEBBUS_MARB_1_ID = 0x29,
+	A4XX_RBBM_DEBBUS_MARB_2_ID = 0x2a,
+	A4XX_RBBM_DEBBUS_MARB_3_ID = 0x2b,
+	A4XX_RBBM_DEBBUS_CCU_0_ID = 0x30,
+	A4XX_RBBM_DEBBUS_CCU_1_ID = 0x31,
+	A4XX_RBBM_DEBBUS_CCU_2_ID = 0x32,
+	A4XX_RBBM_DEBBUS_CCU_3_ID = 0x33
+};
+
+#define A4XX_NUM_AXI_ARB_BLOCKS	2
+#define A4XX_NUM_XIN_BLOCKS	5
+
+#endif /* _A4XX_REG_H */
diff --git a/drivers/gpu/msm/a5xx_reg.h b/drivers/gpu/msm/a5xx_reg.h
new file mode 100644
index 0000000..ef2861c
--- /dev/null
+++ b/drivers/gpu/msm/a5xx_reg.h
@@ -0,0 +1,910 @@
+/* Copyright (c) 2014-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _A5XX_REG_H
+#define _A5XX_REG_H
+
+/* A5XX interrupt bits */
+#define A5XX_INT_RBBM_GPU_IDLE           0
+#define A5XX_INT_RBBM_AHB_ERROR          1
+#define A5XX_INT_RBBM_TRANSFER_TIMEOUT   2
+#define A5XX_INT_RBBM_ME_MS_TIMEOUT      3
+#define A5XX_INT_RBBM_PFP_MS_TIMEOUT     4
+#define A5XX_INT_RBBM_ETS_MS_TIMEOUT     5
+#define A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW 6
+#define A5XX_INT_RBBM_GPC_ERROR          7
+#define A5XX_INT_CP_SW                   8
+#define A5XX_INT_CP_HW_ERROR             9
+#define A5XX_INT_CP_CCU_FLUSH_DEPTH_TS   10
+#define A5XX_INT_CP_CCU_FLUSH_COLOR_TS   11
+#define A5XX_INT_CP_CCU_RESOLVE_TS       12
+#define A5XX_INT_CP_IB2                  13
+#define A5XX_INT_CP_IB1                  14
+#define A5XX_INT_CP_RB                   15
+#define A5XX_INT_CP_UNUSED_1             16
+#define A5XX_INT_CP_RB_DONE_TS           17
+#define A5XX_INT_CP_WT_DONE_TS           18
+#define A5XX_INT_UNKNOWN_1               19
+#define A5XX_INT_CP_CACHE_FLUSH_TS       20
+#define A5XX_INT_UNUSED_2                21
+#define A5XX_INT_RBBM_ATB_BUS_OVERFLOW   22
+#define A5XX_INT_MISC_HANG_DETECT        23
+#define A5XX_INT_UCHE_OOB_ACCESS         24
+#define A5XX_INT_UCHE_TRAP_INTR          25
+#define A5XX_INT_DEBBUS_INTR_0           26
+#define A5XX_INT_DEBBUS_INTR_1           27
+#define A5XX_INT_GPMU_VOLTAGE_DROOP      28
+#define A5XX_INT_GPMU_FIRMWARE           29
+#define A5XX_INT_ISDB_CPU_IRQ            30
+#define A5XX_INT_ISDB_UNDER_DEBUG        31
+
+/* CP Interrupt bits */
+#define A5XX_CP_OPCODE_ERROR               0
+#define A5XX_CP_RESERVED_BIT_ERROR         1
+#define A5XX_CP_HW_FAULT_ERROR             2
+#define A5XX_CP_DMA_ERROR                  3
+#define A5XX_CP_REGISTER_PROTECTION_ERROR  4
+#define A5XX_CP_AHB_ERROR                  5
+
+/* CP registers */
+#define A5XX_CP_RB_BASE                  0x800
+#define A5XX_CP_RB_BASE_HI               0x801
+#define A5XX_CP_RB_CNTL                  0x802
+#define A5XX_CP_RB_RPTR_ADDR_LO          0x804
+#define A5XX_CP_RB_RPTR_ADDR_HI          0x805
+#define A5XX_CP_RB_RPTR                  0x806
+#define A5XX_CP_RB_WPTR                  0x807
+#define A5XX_CP_PFP_STAT_ADDR            0x808
+#define A5XX_CP_PFP_STAT_DATA            0x809
+#define A5XX_CP_DRAW_STATE_ADDR          0x80B
+#define A5XX_CP_DRAW_STATE_DATA          0x80C
+#define A5XX_CP_CRASH_SCRIPT_BASE_LO     0x817
+#define A5XX_CP_CRASH_SCRIPT_BASE_HI     0x818
+#define A5XX_CP_CRASH_DUMP_CNTL          0x819
+#define A5XX_CP_ME_STAT_ADDR             0x81A
+#define A5XX_CP_ROQ_THRESHOLDS_1         0x81F
+#define A5XX_CP_ROQ_THRESHOLDS_2         0x820
+#define A5XX_CP_ROQ_DBG_ADDR             0x821
+#define A5XX_CP_ROQ_DBG_DATA             0x822
+#define A5XX_CP_MEQ_DBG_ADDR             0x823
+#define A5XX_CP_MEQ_DBG_DATA             0x824
+#define A5XX_CP_MEQ_THRESHOLDS           0x825
+#define A5XX_CP_MERCIU_SIZE              0x826
+#define A5XX_CP_MERCIU_DBG_ADDR          0x827
+#define A5XX_CP_MERCIU_DBG_DATA_1        0x828
+#define A5XX_CP_MERCIU_DBG_DATA_2        0x829
+#define A5XX_CP_PFP_UCODE_DBG_ADDR       0x82A
+#define A5XX_CP_PFP_UCODE_DBG_DATA       0x82B
+#define A5XX_CP_ME_UCODE_DBG_ADDR        0x82F
+#define A5XX_CP_ME_UCODE_DBG_DATA        0x830
+#define A5XX_CP_CNTL                     0x831
+#define A5XX_CP_ME_CNTL                  0x832
+#define A5XX_CP_CHICKEN_DBG              0x833
+#define A5XX_CP_PFP_INSTR_BASE_LO        0x835
+#define A5XX_CP_PFP_INSTR_BASE_HI        0x836
+#define A5XX_CP_PM4_INSTR_BASE_LO        0x838
+#define A5XX_CP_PM4_INSTR_BASE_HI        0x839
+#define A5XX_CP_CONTEXT_SWITCH_CNTL      0x83B
+#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO   0x83C
+#define A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI   0x83D
+#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO   0x83E
+#define A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI   0x83F
+#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO   0x840
+#define A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI   0x841
+#define A5XX_CP_ADDR_MODE_CNTL           0x860
+#define A5XX_CP_ME_STAT_DATA             0xB14
+#define A5XX_CP_WFI_PEND_CTR             0xB15
+#define A5XX_CP_INTERRUPT_STATUS         0xB18
+#define A5XX_CP_HW_FAULT                 0xB1A
+#define A5XX_CP_PROTECT_STATUS           0xB1C
+#define A5XX_CP_IB1_BASE                 0xB1F
+#define A5XX_CP_IB1_BASE_HI              0xB20
+#define A5XX_CP_IB1_BUFSZ                0xB21
+#define A5XX_CP_IB2_BASE                 0xB22
+#define A5XX_CP_IB2_BASE_HI              0xB23
+#define A5XX_CP_IB2_BUFSZ                0xB24
+#define A5XX_CP_PROTECT_REG_0            0x880
+#define A5XX_CP_PROTECT_CNTL             0x8A0
+#define A5XX_CP_AHB_FAULT                0xB1B
+#define A5XX_CP_PERFCTR_CP_SEL_0         0xBB0
+#define A5XX_CP_PERFCTR_CP_SEL_1         0xBB1
+#define A5XX_CP_PERFCTR_CP_SEL_2         0xBB2
+#define A5XX_CP_PERFCTR_CP_SEL_3         0xBB3
+#define A5XX_CP_PERFCTR_CP_SEL_4         0xBB4
+#define A5XX_CP_PERFCTR_CP_SEL_5         0xBB5
+#define A5XX_CP_PERFCTR_CP_SEL_6         0xBB6
+#define A5XX_CP_PERFCTR_CP_SEL_7         0xBB7
+
+#define A5XX_VSC_ADDR_MODE_CNTL          0xBC1
+
+/* CP Power Counter Registers Select */
+#define A5XX_CP_POWERCTR_CP_SEL_0        0xBBA
+#define A5XX_CP_POWERCTR_CP_SEL_1        0xBBB
+#define A5XX_CP_POWERCTR_CP_SEL_2        0xBBC
+#define A5XX_CP_POWERCTR_CP_SEL_3        0xBBD
+
+/* RBBM registers */
+#define A5XX_RBBM_CFG_DBGBUS_SEL_A               0x4
+#define A5XX_RBBM_CFG_DBGBUS_SEL_B               0x5
+#define A5XX_RBBM_CFG_DBGBUS_SEL_C               0x6
+#define A5XX_RBBM_CFG_DBGBUS_SEL_D               0x7
+#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT    0x0
+#define A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT  0x8
+
+#define A5XX_RBBM_CFG_DBGBUS_CNTLT               0x8
+#define A5XX_RBBM_CFG_DBGBUS_CNTLM               0x9
+#define A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT  0x18
+#define A5XX_RBBM_CFG_DBGBUS_OPL                 0xA
+#define A5XX_RBBM_CFG_DBGBUS_OPE                 0xB
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_0              0xC
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_1              0xD
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_2              0xE
+#define A5XX_RBBM_CFG_DBGBUS_IVTL_3              0xF
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_0             0x10
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_1             0x11
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_2             0x12
+#define A5XX_RBBM_CFG_DBGBUS_MASKL_3             0x13
+#define A5XX_RBBM_CFG_DBGBUS_BYTEL_0             0x14
+#define A5XX_RBBM_CFG_DBGBUS_BYTEL_1             0x15
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_0              0x16
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_1              0x17
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_2              0x18
+#define A5XX_RBBM_CFG_DBGBUS_IVTE_3              0x19
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_0             0x1A
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_1             0x1B
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_2             0x1C
+#define A5XX_RBBM_CFG_DBGBUS_MASKE_3             0x1D
+#define A5XX_RBBM_CFG_DBGBUS_NIBBLEE             0x1E
+#define A5XX_RBBM_CFG_DBGBUS_PTRC0               0x1F
+#define A5XX_RBBM_CFG_DBGBUS_PTRC1               0x20
+#define A5XX_RBBM_CFG_DBGBUS_LOADREG             0x21
+#define A5XX_RBBM_CFG_DBGBUS_IDX                 0x22
+#define A5XX_RBBM_CFG_DBGBUS_CLRC                0x23
+#define A5XX_RBBM_CFG_DBGBUS_LOADIVT             0x24
+#define A5XX_RBBM_INTERFACE_HANG_INT_CNTL        0x2F
+#define A5XX_RBBM_INT_CLEAR_CMD                  0x37
+#define A5XX_RBBM_INT_0_MASK                     0x38
+#define A5XX_RBBM_AHB_DBG_CNTL                   0x3F
+#define A5XX_RBBM_EXT_VBIF_DBG_CNTL              0x41
+#define A5XX_RBBM_SW_RESET_CMD                   0x43
+#define A5XX_RBBM_BLOCK_SW_RESET_CMD             0x45
+#define A5XX_RBBM_BLOCK_SW_RESET_CMD2            0x46
+#define A5XX_RBBM_DBG_LO_HI_GPIO                 0x48
+#define A5XX_RBBM_EXT_TRACE_BUS_CNTL             0x49
+#define A5XX_RBBM_CLOCK_CNTL_TP0                 0x4A
+#define A5XX_RBBM_CLOCK_CNTL_TP1                 0x4B
+#define A5XX_RBBM_CLOCK_CNTL_TP2                 0x4C
+#define A5XX_RBBM_CLOCK_CNTL_TP3                 0x4D
+#define A5XX_RBBM_CLOCK_CNTL2_TP0                0x4E
+#define A5XX_RBBM_CLOCK_CNTL2_TP1                0x4F
+#define A5XX_RBBM_CLOCK_CNTL2_TP2                0x50
+#define A5XX_RBBM_CLOCK_CNTL2_TP3                0x51
+#define A5XX_RBBM_CLOCK_CNTL3_TP0                0x52
+#define A5XX_RBBM_CLOCK_CNTL3_TP1                0x53
+#define A5XX_RBBM_CLOCK_CNTL3_TP2                0x54
+#define A5XX_RBBM_CLOCK_CNTL3_TP3                0x55
+#define A5XX_RBBM_READ_AHB_THROUGH_DBG           0x59
+#define A5XX_RBBM_CLOCK_CNTL_UCHE                0x5A
+#define A5XX_RBBM_CLOCK_CNTL2_UCHE               0x5B
+#define A5XX_RBBM_CLOCK_CNTL3_UCHE               0x5C
+#define A5XX_RBBM_CLOCK_CNTL4_UCHE               0x5D
+#define A5XX_RBBM_CLOCK_HYST_UCHE                0x5E
+#define A5XX_RBBM_CLOCK_DELAY_UCHE               0x5F
+#define A5XX_RBBM_CLOCK_MODE_GPC                 0x60
+#define A5XX_RBBM_CLOCK_DELAY_GPC                0x61
+#define A5XX_RBBM_CLOCK_HYST_GPC                 0x62
+#define A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM        0x63
+#define A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM        0x64
+#define A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM       0x65
+#define A5XX_RBBM_CLOCK_DELAY_HLSQ               0x66
+#define A5XX_RBBM_CLOCK_CNTL                     0x67
+#define A5XX_RBBM_CLOCK_CNTL_SP0                 0x68
+#define A5XX_RBBM_CLOCK_CNTL_SP1                 0x69
+#define A5XX_RBBM_CLOCK_CNTL_SP2                 0x6A
+#define A5XX_RBBM_CLOCK_CNTL_SP3                 0x6B
+#define A5XX_RBBM_CLOCK_CNTL2_SP0                0x6C
+#define A5XX_RBBM_CLOCK_CNTL2_SP1                0x6D
+#define A5XX_RBBM_CLOCK_CNTL2_SP2                0x6E
+#define A5XX_RBBM_CLOCK_CNTL2_SP3                0x6F
+#define A5XX_RBBM_CLOCK_HYST_SP0                 0x70
+#define A5XX_RBBM_CLOCK_HYST_SP1                 0x71
+#define A5XX_RBBM_CLOCK_HYST_SP2                 0x72
+#define A5XX_RBBM_CLOCK_HYST_SP3                 0x73
+#define A5XX_RBBM_CLOCK_DELAY_SP0                0x74
+#define A5XX_RBBM_CLOCK_DELAY_SP1                0x75
+#define A5XX_RBBM_CLOCK_DELAY_SP2                0x76
+#define A5XX_RBBM_CLOCK_DELAY_SP3                0x77
+#define A5XX_RBBM_CLOCK_CNTL_RB0                 0x78
+#define A5XX_RBBM_CLOCK_CNTL_RB1                 0x79
+#define A5XX_RBBM_CLOCK_CNTL_RB2                 0x7a
+#define A5XX_RBBM_CLOCK_CNTL_RB3                 0x7B
+#define A5XX_RBBM_CLOCK_CNTL2_RB0                0x7C
+#define A5XX_RBBM_CLOCK_CNTL2_RB1                0x7D
+#define A5XX_RBBM_CLOCK_CNTL2_RB2                0x7E
+#define A5XX_RBBM_CLOCK_CNTL2_RB3                0x7F
+#define A5XX_RBBM_CLOCK_HYST_RAC                 0x80
+#define A5XX_RBBM_CLOCK_DELAY_RAC                0x81
+#define A5XX_RBBM_CLOCK_CNTL_CCU0                0x82
+#define A5XX_RBBM_CLOCK_CNTL_CCU1                0x83
+#define A5XX_RBBM_CLOCK_CNTL_CCU2                0x84
+#define A5XX_RBBM_CLOCK_CNTL_CCU3                0x85
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU0             0x86
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU1             0x87
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU2             0x88
+#define A5XX_RBBM_CLOCK_HYST_RB_CCU3             0x89
+#define A5XX_RBBM_CLOCK_CNTL_RAC                 0x8A
+#define A5XX_RBBM_CLOCK_CNTL2_RAC                0x8B
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0        0x8C
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1        0x8D
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2        0x8E
+#define A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3        0x8F
+#define A5XX_RBBM_CLOCK_HYST_VFD                 0x90
+#define A5XX_RBBM_CLOCK_MODE_VFD                 0x91
+#define A5XX_RBBM_CLOCK_DELAY_VFD                0x92
+#define A5XX_RBBM_AHB_CNTL0                      0x93
+#define A5XX_RBBM_AHB_CNTL1                      0x94
+#define A5XX_RBBM_AHB_CNTL2                      0x95
+#define A5XX_RBBM_AHB_CMD                        0x96
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11     0x9C
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12     0x9D
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13     0x9E
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14     0x9F
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15     0xA0
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16     0xA1
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17     0xA2
+#define A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18     0xA3
+#define A5XX_RBBM_CLOCK_DELAY_TP0                0xA4
+#define A5XX_RBBM_CLOCK_DELAY_TP1                0xA5
+#define A5XX_RBBM_CLOCK_DELAY_TP2                0xA6
+#define A5XX_RBBM_CLOCK_DELAY_TP3                0xA7
+#define A5XX_RBBM_CLOCK_DELAY2_TP0               0xA8
+#define A5XX_RBBM_CLOCK_DELAY2_TP1               0xA9
+#define A5XX_RBBM_CLOCK_DELAY2_TP2               0xAA
+#define A5XX_RBBM_CLOCK_DELAY2_TP3               0xAB
+#define A5XX_RBBM_CLOCK_DELAY3_TP0               0xAC
+#define A5XX_RBBM_CLOCK_DELAY3_TP1               0xAD
+#define A5XX_RBBM_CLOCK_DELAY3_TP2               0xAE
+#define A5XX_RBBM_CLOCK_DELAY3_TP3               0xAF
+#define A5XX_RBBM_CLOCK_HYST_TP0                 0xB0
+#define A5XX_RBBM_CLOCK_HYST_TP1                 0xB1
+#define A5XX_RBBM_CLOCK_HYST_TP2                 0xB2
+#define A5XX_RBBM_CLOCK_HYST_TP3                 0xB3
+#define A5XX_RBBM_CLOCK_HYST2_TP0                0xB4
+#define A5XX_RBBM_CLOCK_HYST2_TP1                0xB5
+#define A5XX_RBBM_CLOCK_HYST2_TP2                0xB6
+#define A5XX_RBBM_CLOCK_HYST2_TP3                0xB7
+#define A5XX_RBBM_CLOCK_HYST3_TP0                0xB8
+#define A5XX_RBBM_CLOCK_HYST3_TP1                0xB9
+#define A5XX_RBBM_CLOCK_HYST3_TP2                0xBA
+#define A5XX_RBBM_CLOCK_HYST3_TP3                0xBB
+#define A5XX_RBBM_CLOCK_CNTL_GPMU                0xC8
+#define A5XX_RBBM_CLOCK_DELAY_GPMU               0xC9
+#define A5XX_RBBM_CLOCK_HYST_GPMU                0xCA
+#define A5XX_RBBM_PERFCTR_CP_0_LO                0x3A0
+#define A5XX_RBBM_PERFCTR_CP_0_HI                0x3A1
+#define A5XX_RBBM_PERFCTR_CP_1_LO                0x3A2
+#define A5XX_RBBM_PERFCTR_CP_1_HI                0x3A3
+#define A5XX_RBBM_PERFCTR_CP_2_LO                0x3A4
+#define A5XX_RBBM_PERFCTR_CP_2_HI                0x3A5
+#define A5XX_RBBM_PERFCTR_CP_3_LO                0x3A6
+#define A5XX_RBBM_PERFCTR_CP_3_HI                0x3A7
+#define A5XX_RBBM_PERFCTR_CP_4_LO                0x3A8
+#define A5XX_RBBM_PERFCTR_CP_4_HI                0x3A9
+#define A5XX_RBBM_PERFCTR_CP_5_LO                0x3AA
+#define A5XX_RBBM_PERFCTR_CP_5_HI                0x3AB
+#define A5XX_RBBM_PERFCTR_CP_6_LO                0x3AC
+#define A5XX_RBBM_PERFCTR_CP_6_HI                0x3AD
+#define A5XX_RBBM_PERFCTR_CP_7_LO                0x3AE
+#define A5XX_RBBM_PERFCTR_CP_7_HI                0x3AF
+#define A5XX_RBBM_PERFCTR_RBBM_0_LO              0x3B0
+#define A5XX_RBBM_PERFCTR_RBBM_0_HI              0x3B1
+#define A5XX_RBBM_PERFCTR_RBBM_1_LO              0x3B2
+#define A5XX_RBBM_PERFCTR_RBBM_1_HI              0x3B3
+#define A5XX_RBBM_PERFCTR_RBBM_2_LO              0x3B4
+#define A5XX_RBBM_PERFCTR_RBBM_2_HI              0x3B5
+#define A5XX_RBBM_PERFCTR_RBBM_3_LO              0x3B6
+#define A5XX_RBBM_PERFCTR_RBBM_3_HI              0x3B7
+#define A5XX_RBBM_PERFCTR_PC_0_LO                0x3B8
+#define A5XX_RBBM_PERFCTR_PC_0_HI                0x3B9
+#define A5XX_RBBM_PERFCTR_PC_1_LO                0x3BA
+#define A5XX_RBBM_PERFCTR_PC_1_HI                0x3BB
+#define A5XX_RBBM_PERFCTR_PC_2_LO                0x3BC
+#define A5XX_RBBM_PERFCTR_PC_2_HI                0x3BD
+#define A5XX_RBBM_PERFCTR_PC_3_LO                0x3BE
+#define A5XX_RBBM_PERFCTR_PC_3_HI                0x3BF
+#define A5XX_RBBM_PERFCTR_PC_4_LO                0x3C0
+#define A5XX_RBBM_PERFCTR_PC_4_HI                0x3C1
+#define A5XX_RBBM_PERFCTR_PC_5_LO                0x3C2
+#define A5XX_RBBM_PERFCTR_PC_5_HI                0x3C3
+#define A5XX_RBBM_PERFCTR_PC_6_LO                0x3C4
+#define A5XX_RBBM_PERFCTR_PC_6_HI                0x3C5
+#define A5XX_RBBM_PERFCTR_PC_7_LO                0x3C6
+#define A5XX_RBBM_PERFCTR_PC_7_HI                0x3C7
+#define A5XX_RBBM_PERFCTR_VFD_0_LO               0x3C8
+#define A5XX_RBBM_PERFCTR_VFD_0_HI               0x3C9
+#define A5XX_RBBM_PERFCTR_VFD_1_LO               0x3CA
+#define A5XX_RBBM_PERFCTR_VFD_1_HI               0x3CB
+#define A5XX_RBBM_PERFCTR_VFD_2_LO               0x3CC
+#define A5XX_RBBM_PERFCTR_VFD_2_HI               0x3CD
+#define A5XX_RBBM_PERFCTR_VFD_3_LO               0x3CE
+#define A5XX_RBBM_PERFCTR_VFD_3_HI               0x3CF
+#define A5XX_RBBM_PERFCTR_VFD_4_LO               0x3D0
+#define A5XX_RBBM_PERFCTR_VFD_4_HI               0x3D1
+#define A5XX_RBBM_PERFCTR_VFD_5_LO               0x3D2
+#define A5XX_RBBM_PERFCTR_VFD_5_HI               0x3D3
+#define A5XX_RBBM_PERFCTR_VFD_6_LO               0x3D4
+#define A5XX_RBBM_PERFCTR_VFD_6_HI               0x3D5
+#define A5XX_RBBM_PERFCTR_VFD_7_LO               0x3D6
+#define A5XX_RBBM_PERFCTR_VFD_7_HI               0x3D7
+#define A5XX_RBBM_PERFCTR_HLSQ_0_LO              0x3D8
+#define A5XX_RBBM_PERFCTR_HLSQ_0_HI              0x3D9
+#define A5XX_RBBM_PERFCTR_HLSQ_1_LO              0x3DA
+#define A5XX_RBBM_PERFCTR_HLSQ_1_HI              0x3DB
+#define A5XX_RBBM_PERFCTR_HLSQ_2_LO              0x3DC
+#define A5XX_RBBM_PERFCTR_HLSQ_2_HI              0x3DD
+#define A5XX_RBBM_PERFCTR_HLSQ_3_LO              0x3DE
+#define A5XX_RBBM_PERFCTR_HLSQ_3_HI              0x3DF
+#define A5XX_RBBM_PERFCTR_HLSQ_4_LO              0x3E0
+#define A5XX_RBBM_PERFCTR_HLSQ_4_HI              0x3E1
+#define A5XX_RBBM_PERFCTR_HLSQ_5_LO              0x3E2
+#define A5XX_RBBM_PERFCTR_HLSQ_5_HI              0x3E3
+#define A5XX_RBBM_PERFCTR_HLSQ_6_LO              0x3E4
+#define A5XX_RBBM_PERFCTR_HLSQ_6_HI              0x3E5
+#define A5XX_RBBM_PERFCTR_HLSQ_7_LO              0x3E6
+#define A5XX_RBBM_PERFCTR_HLSQ_7_HI              0x3E7
+#define A5XX_RBBM_PERFCTR_VPC_0_LO               0x3E8
+#define A5XX_RBBM_PERFCTR_VPC_0_HI               0x3E9
+#define A5XX_RBBM_PERFCTR_VPC_1_LO               0x3EA
+#define A5XX_RBBM_PERFCTR_VPC_1_HI               0x3EB
+#define A5XX_RBBM_PERFCTR_VPC_2_LO               0x3EC
+#define A5XX_RBBM_PERFCTR_VPC_2_HI               0x3ED
+#define A5XX_RBBM_PERFCTR_VPC_3_LO               0x3EE
+#define A5XX_RBBM_PERFCTR_VPC_3_HI               0x3EF
+#define A5XX_RBBM_PERFCTR_CCU_0_LO               0x3F0
+#define A5XX_RBBM_PERFCTR_CCU_0_HI               0x3F1
+#define A5XX_RBBM_PERFCTR_CCU_1_LO               0x3F2
+#define A5XX_RBBM_PERFCTR_CCU_1_HI               0x3F3
+#define A5XX_RBBM_PERFCTR_CCU_2_LO               0x3F4
+#define A5XX_RBBM_PERFCTR_CCU_2_HI               0x3F5
+#define A5XX_RBBM_PERFCTR_CCU_3_LO               0x3F6
+#define A5XX_RBBM_PERFCTR_CCU_3_HI               0x3F7
+#define A5XX_RBBM_PERFCTR_TSE_0_LO               0x3F8
+#define A5XX_RBBM_PERFCTR_TSE_0_HI               0x3F9
+#define A5XX_RBBM_PERFCTR_TSE_1_LO               0x3FA
+#define A5XX_RBBM_PERFCTR_TSE_1_HI               0x3FB
+#define A5XX_RBBM_PERFCTR_TSE_2_LO               0x3FC
+#define A5XX_RBBM_PERFCTR_TSE_2_HI               0x3FD
+#define A5XX_RBBM_PERFCTR_TSE_3_LO               0x3FE
+#define A5XX_RBBM_PERFCTR_TSE_3_HI               0x3FF
+#define A5XX_RBBM_PERFCTR_RAS_0_LO               0x400
+#define A5XX_RBBM_PERFCTR_RAS_0_HI               0x401
+#define A5XX_RBBM_PERFCTR_RAS_1_LO               0x402
+#define A5XX_RBBM_PERFCTR_RAS_1_HI               0x403
+#define A5XX_RBBM_PERFCTR_RAS_2_LO               0x404
+#define A5XX_RBBM_PERFCTR_RAS_2_HI               0x405
+#define A5XX_RBBM_PERFCTR_RAS_3_LO               0x406
+#define A5XX_RBBM_PERFCTR_RAS_3_HI               0x407
+#define A5XX_RBBM_PERFCTR_UCHE_0_LO              0x408
+#define A5XX_RBBM_PERFCTR_UCHE_0_HI              0x409
+#define A5XX_RBBM_PERFCTR_UCHE_1_LO              0x40A
+#define A5XX_RBBM_PERFCTR_UCHE_1_HI              0x40B
+#define A5XX_RBBM_PERFCTR_UCHE_2_LO              0x40C
+#define A5XX_RBBM_PERFCTR_UCHE_2_HI              0x40D
+#define A5XX_RBBM_PERFCTR_UCHE_3_LO              0x40E
+#define A5XX_RBBM_PERFCTR_UCHE_3_HI              0x40F
+#define A5XX_RBBM_PERFCTR_UCHE_4_LO              0x410
+#define A5XX_RBBM_PERFCTR_UCHE_4_HI              0x411
+#define A5XX_RBBM_PERFCTR_UCHE_5_LO              0x412
+#define A5XX_RBBM_PERFCTR_UCHE_5_HI              0x413
+#define A5XX_RBBM_PERFCTR_UCHE_6_LO              0x414
+#define A5XX_RBBM_PERFCTR_UCHE_6_HI              0x415
+#define A5XX_RBBM_PERFCTR_UCHE_7_LO              0x416
+#define A5XX_RBBM_PERFCTR_UCHE_7_HI              0x417
+#define A5XX_RBBM_PERFCTR_TP_0_LO                0x418
+#define A5XX_RBBM_PERFCTR_TP_0_HI                0x419
+#define A5XX_RBBM_PERFCTR_TP_1_LO                0x41A
+#define A5XX_RBBM_PERFCTR_TP_1_HI                0x41B
+#define A5XX_RBBM_PERFCTR_TP_2_LO                0x41C
+#define A5XX_RBBM_PERFCTR_TP_2_HI                0x41D
+#define A5XX_RBBM_PERFCTR_TP_3_LO                0x41E
+#define A5XX_RBBM_PERFCTR_TP_3_HI                0x41F
+#define A5XX_RBBM_PERFCTR_TP_4_LO                0x420
+#define A5XX_RBBM_PERFCTR_TP_4_HI                0x421
+#define A5XX_RBBM_PERFCTR_TP_5_LO                0x422
+#define A5XX_RBBM_PERFCTR_TP_5_HI                0x423
+#define A5XX_RBBM_PERFCTR_TP_6_LO                0x424
+#define A5XX_RBBM_PERFCTR_TP_6_HI                0x425
+#define A5XX_RBBM_PERFCTR_TP_7_LO                0x426
+#define A5XX_RBBM_PERFCTR_TP_7_HI                0x427
+#define A5XX_RBBM_PERFCTR_SP_0_LO                0x428
+#define A5XX_RBBM_PERFCTR_SP_0_HI                0x429
+#define A5XX_RBBM_PERFCTR_SP_1_LO                0x42A
+#define A5XX_RBBM_PERFCTR_SP_1_HI                0x42B
+#define A5XX_RBBM_PERFCTR_SP_2_LO                0x42C
+#define A5XX_RBBM_PERFCTR_SP_2_HI                0x42D
+#define A5XX_RBBM_PERFCTR_SP_3_LO                0x42E
+#define A5XX_RBBM_PERFCTR_SP_3_HI                0x42F
+#define A5XX_RBBM_PERFCTR_SP_4_LO                0x430
+#define A5XX_RBBM_PERFCTR_SP_4_HI                0x431
+#define A5XX_RBBM_PERFCTR_SP_5_LO                0x432
+#define A5XX_RBBM_PERFCTR_SP_5_HI                0x433
+#define A5XX_RBBM_PERFCTR_SP_6_LO                0x434
+#define A5XX_RBBM_PERFCTR_SP_6_HI                0x435
+#define A5XX_RBBM_PERFCTR_SP_7_LO                0x436
+#define A5XX_RBBM_PERFCTR_SP_7_HI                0x437
+#define A5XX_RBBM_PERFCTR_SP_8_LO                0x438
+#define A5XX_RBBM_PERFCTR_SP_8_HI                0x439
+#define A5XX_RBBM_PERFCTR_SP_9_LO                0x43A
+#define A5XX_RBBM_PERFCTR_SP_9_HI                0x43B
+#define A5XX_RBBM_PERFCTR_SP_10_LO               0x43C
+#define A5XX_RBBM_PERFCTR_SP_10_HI               0x43D
+#define A5XX_RBBM_PERFCTR_SP_11_LO               0x43E
+#define A5XX_RBBM_PERFCTR_SP_11_HI               0x43F
+#define A5XX_RBBM_PERFCTR_RB_0_LO                0x440
+#define A5XX_RBBM_PERFCTR_RB_0_HI                0x441
+#define A5XX_RBBM_PERFCTR_RB_1_LO                0x442
+#define A5XX_RBBM_PERFCTR_RB_1_HI                0x443
+#define A5XX_RBBM_PERFCTR_RB_2_LO                0x444
+#define A5XX_RBBM_PERFCTR_RB_2_HI                0x445
+#define A5XX_RBBM_PERFCTR_RB_3_LO                0x446
+#define A5XX_RBBM_PERFCTR_RB_3_HI                0x447
+#define A5XX_RBBM_PERFCTR_RB_4_LO                0x448
+#define A5XX_RBBM_PERFCTR_RB_4_HI                0x449
+#define A5XX_RBBM_PERFCTR_RB_5_LO                0x44A
+#define A5XX_RBBM_PERFCTR_RB_5_HI                0x44B
+#define A5XX_RBBM_PERFCTR_RB_6_LO                0x44C
+#define A5XX_RBBM_PERFCTR_RB_6_HI                0x44D
+#define A5XX_RBBM_PERFCTR_RB_7_LO                0x44E
+#define A5XX_RBBM_PERFCTR_RB_7_HI                0x44F
+#define A5XX_RBBM_PERFCTR_VSC_0_LO               0x450
+#define A5XX_RBBM_PERFCTR_VSC_0_HI               0x451
+#define A5XX_RBBM_PERFCTR_VSC_1_LO               0x452
+#define A5XX_RBBM_PERFCTR_VSC_1_HI               0x453
+#define A5XX_RBBM_PERFCTR_LRZ_0_LO               0x454
+#define A5XX_RBBM_PERFCTR_LRZ_0_HI               0x455
+#define A5XX_RBBM_PERFCTR_LRZ_1_LO               0x456
+#define A5XX_RBBM_PERFCTR_LRZ_1_HI               0x457
+#define A5XX_RBBM_PERFCTR_LRZ_2_LO               0x458
+#define A5XX_RBBM_PERFCTR_LRZ_2_HI               0x459
+#define A5XX_RBBM_PERFCTR_LRZ_3_LO               0x45A
+#define A5XX_RBBM_PERFCTR_LRZ_3_HI               0x45B
+#define A5XX_RBBM_PERFCTR_CMP_0_LO               0x45C
+#define A5XX_RBBM_PERFCTR_CMP_0_HI               0x45D
+#define A5XX_RBBM_PERFCTR_CMP_1_LO               0x45E
+#define A5XX_RBBM_PERFCTR_CMP_1_HI               0x45F
+#define A5XX_RBBM_PERFCTR_CMP_2_LO               0x460
+#define A5XX_RBBM_PERFCTR_CMP_2_HI               0x461
+#define A5XX_RBBM_PERFCTR_CMP_3_LO               0x462
+#define A5XX_RBBM_PERFCTR_CMP_3_HI               0x463
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_0             0x46B
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_1             0x46C
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_2             0x46D
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_3             0x46E
+#define A5XX_RBBM_ALWAYSON_COUNTER_LO            0x4D2
+#define A5XX_RBBM_ALWAYSON_COUNTER_HI            0x4D3
+#define A5XX_RBBM_STATUS                         0x4F5
+#define A5XX_RBBM_STATUS3                        0x530
+#define A5XX_RBBM_INT_0_STATUS                   0x4E1
+#define A5XX_RBBM_AHB_ME_SPLIT_STATUS            0x4F0
+#define A5XX_RBBM_AHB_PFP_SPLIT_STATUS           0x4F1
+#define A5XX_RBBM_AHB_ERROR_STATUS               0x4F4
+#define A5XX_RBBM_PERFCTR_CNTL                   0x464
+#define A5XX_RBBM_PERFCTR_LOAD_CMD0              0x465
+#define A5XX_RBBM_PERFCTR_LOAD_CMD1              0x466
+#define A5XX_RBBM_PERFCTR_LOAD_CMD2              0x467
+#define A5XX_RBBM_PERFCTR_LOAD_CMD3              0x468
+#define A5XX_RBBM_PERFCTR_LOAD_VALUE_LO          0x469
+#define A5XX_RBBM_PERFCTR_LOAD_VALUE_HI          0x46A
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_0             0x46B
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_1             0x46C
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_2             0x46D
+#define A5XX_RBBM_PERFCTR_RBBM_SEL_3             0x46E
+#define A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED        0x46F
+#define A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC         0x504
+#define A5XX_RBBM_CFG_DBGBUS_OVER                0x505
+#define A5XX_RBBM_CFG_DBGBUS_COUNT0              0x506
+#define A5XX_RBBM_CFG_DBGBUS_COUNT1              0x507
+#define A5XX_RBBM_CFG_DBGBUS_COUNT2              0x508
+#define A5XX_RBBM_CFG_DBGBUS_COUNT3              0x509
+#define A5XX_RBBM_CFG_DBGBUS_COUNT4              0x50A
+#define A5XX_RBBM_CFG_DBGBUS_COUNT5              0x50B
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR          0x50C
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0          0x50D
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1          0x50E
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2          0x50F
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3          0x510
+#define A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4          0x511
+#define A5XX_RBBM_CFG_DBGBUS_MISR0               0x512
+#define A5XX_RBBM_CFG_DBGBUS_MISR1               0x513
+#define A5XX_RBBM_ISDB_CNT                       0x533
+#define A5XX_RBBM_SECVID_TRUST_CONFIG            0xF000
+#define A5XX_RBBM_SECVID_TRUST_CNTL              0xF400
+#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO     0xF800
+#define A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI     0xF801
+#define A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE        0xF802
+#define A5XX_RBBM_SECVID_TSB_CNTL                0xF803
+#define A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL      0xF810
+
+/* VSC registers */
+#define A5XX_VSC_PERFCTR_VSC_SEL_0          0xC60
+#define A5XX_VSC_PERFCTR_VSC_SEL_1          0xC61
+
+#define A5XX_GRAS_ADDR_MODE_CNTL            0xC81
+
+/* TSE registers */
+#define A5XX_GRAS_PERFCTR_TSE_SEL_0         0xC90
+#define A5XX_GRAS_PERFCTR_TSE_SEL_1         0xC91
+#define A5XX_GRAS_PERFCTR_TSE_SEL_2         0xC92
+#define A5XX_GRAS_PERFCTR_TSE_SEL_3         0xC93
+
+/* RAS registers */
+#define A5XX_GRAS_PERFCTR_RAS_SEL_0         0xC94
+#define A5XX_GRAS_PERFCTR_RAS_SEL_1         0xC95
+#define A5XX_GRAS_PERFCTR_RAS_SEL_2         0xC96
+#define A5XX_GRAS_PERFCTR_RAS_SEL_3         0xC97
+
+/* LRZ registers */
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_0         0xC98
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_1         0xC99
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_2         0xC9A
+#define A5XX_GRAS_PERFCTR_LRZ_SEL_3         0xC9B
+
+
+/* RB registers */
+#define A5XX_RB_DBG_ECO_CNT                 0xCC4
+#define A5XX_RB_ADDR_MODE_CNTL              0xCC5
+#define A5XX_RB_MODE_CNTL                   0xCC6
+#define A5XX_RB_PERFCTR_RB_SEL_0            0xCD0
+#define A5XX_RB_PERFCTR_RB_SEL_1            0xCD1
+#define A5XX_RB_PERFCTR_RB_SEL_2            0xCD2
+#define A5XX_RB_PERFCTR_RB_SEL_3            0xCD3
+#define A5XX_RB_PERFCTR_RB_SEL_4            0xCD4
+#define A5XX_RB_PERFCTR_RB_SEL_5            0xCD5
+#define A5XX_RB_PERFCTR_RB_SEL_6            0xCD6
+#define A5XX_RB_PERFCTR_RB_SEL_7            0xCD7
+
+/* CCU registers */
+#define A5XX_RB_PERFCTR_CCU_SEL_0           0xCD8
+#define A5XX_RB_PERFCTR_CCU_SEL_1           0xCD9
+#define A5XX_RB_PERFCTR_CCU_SEL_2           0xCDA
+#define A5XX_RB_PERFCTR_CCU_SEL_3           0xCDB
+
+/* RB Power Counter RB Registers Select */
+#define A5XX_RB_POWERCTR_RB_SEL_0           0xCE0
+#define A5XX_RB_POWERCTR_RB_SEL_1           0xCE1
+#define A5XX_RB_POWERCTR_RB_SEL_2           0xCE2
+#define A5XX_RB_POWERCTR_RB_SEL_3           0xCE3
+
+/* RB Power Counter CCU Registers Select */
+#define A5XX_RB_POWERCTR_CCU_SEL_0          0xCE4
+#define A5XX_RB_POWERCTR_CCU_SEL_1          0xCE5
+
+/* CMP registers */
+#define A5XX_RB_PERFCTR_CMP_SEL_0           0xCEC
+#define A5XX_RB_PERFCTR_CMP_SEL_1           0xCED
+#define A5XX_RB_PERFCTR_CMP_SEL_2           0xCEE
+#define A5XX_RB_PERFCTR_CMP_SEL_3           0xCEF
+
+/* PC registers */
+#define A5XX_PC_DBG_ECO_CNTL                0xD00
+#define A5XX_PC_ADDR_MODE_CNTL              0xD01
+#define A5XX_PC_PERFCTR_PC_SEL_0            0xD10
+#define A5XX_PC_PERFCTR_PC_SEL_1            0xD11
+#define A5XX_PC_PERFCTR_PC_SEL_2            0xD12
+#define A5XX_PC_PERFCTR_PC_SEL_3            0xD13
+#define A5XX_PC_PERFCTR_PC_SEL_4            0xD14
+#define A5XX_PC_PERFCTR_PC_SEL_5            0xD15
+#define A5XX_PC_PERFCTR_PC_SEL_6            0xD16
+#define A5XX_PC_PERFCTR_PC_SEL_7            0xD17
+
+/* HLSQ registers */
+#define A5XX_HLSQ_DBG_ECO_CNTL		    0xE04
+#define A5XX_HLSQ_ADDR_MODE_CNTL            0xE05
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_0        0xE10
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_1        0xE11
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_2        0xE12
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_3        0xE13
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_4        0xE14
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_5        0xE15
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_6        0xE16
+#define A5XX_HLSQ_PERFCTR_HLSQ_SEL_7        0xE17
+#define A5XX_HLSQ_DBG_READ_SEL              0xBC00
+#define A5XX_HLSQ_DBG_AHB_READ_APERTURE     0xA000
+
+/* VFD registers */
+#define A5XX_VFD_ADDR_MODE_CNTL             0xE41
+#define A5XX_VFD_PERFCTR_VFD_SEL_0          0xE50
+#define A5XX_VFD_PERFCTR_VFD_SEL_1          0xE51
+#define A5XX_VFD_PERFCTR_VFD_SEL_2          0xE52
+#define A5XX_VFD_PERFCTR_VFD_SEL_3          0xE53
+#define A5XX_VFD_PERFCTR_VFD_SEL_4          0xE54
+#define A5XX_VFD_PERFCTR_VFD_SEL_5          0xE55
+#define A5XX_VFD_PERFCTR_VFD_SEL_6          0xE56
+#define A5XX_VFD_PERFCTR_VFD_SEL_7          0xE57
+
+/* VPC registers */
+#define A5XX_VPC_DBG_ECO_CNTL		    0xE60
+#define A5XX_VPC_ADDR_MODE_CNTL             0xE61
+#define A5XX_VPC_PERFCTR_VPC_SEL_0          0xE64
+#define A5XX_VPC_PERFCTR_VPC_SEL_1          0xE65
+#define A5XX_VPC_PERFCTR_VPC_SEL_2          0xE66
+#define A5XX_VPC_PERFCTR_VPC_SEL_3          0xE67
+
+/* UCHE registers */
+#define A5XX_UCHE_ADDR_MODE_CNTL            0xE80
+#define A5XX_UCHE_MODE_CNTL                 0xE81
+#define A5XX_UCHE_WRITE_THRU_BASE_LO        0xE87
+#define A5XX_UCHE_WRITE_THRU_BASE_HI        0xE88
+#define A5XX_UCHE_TRAP_BASE_LO              0xE89
+#define A5XX_UCHE_TRAP_BASE_HI              0xE8A
+#define A5XX_UCHE_GMEM_RANGE_MIN_LO         0xE8B
+#define A5XX_UCHE_GMEM_RANGE_MIN_HI         0xE8C
+#define A5XX_UCHE_GMEM_RANGE_MAX_LO         0xE8D
+#define A5XX_UCHE_GMEM_RANGE_MAX_HI         0xE8E
+#define A5XX_UCHE_DBG_ECO_CNTL_2            0xE8F
+#define A5XX_UCHE_INVALIDATE0               0xE95
+#define A5XX_UCHE_CACHE_WAYS                0xE96
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_0        0xEA0
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_1        0xEA1
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_2        0xEA2
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_3        0xEA3
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_4        0xEA4
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_5        0xEA5
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_6        0xEA6
+#define A5XX_UCHE_PERFCTR_UCHE_SEL_7        0xEA7
+
+/* UCHE Power Counter UCHE Registers Select */
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_0       0xEA8
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_1       0xEA9
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_2       0xEAA
+#define A5XX_UCHE_POWERCTR_UCHE_SEL_3       0xEAB
+
+/* SP registers */
+#define A5XX_SP_DBG_ECO_CNTL                0xEC0
+#define A5XX_SP_ADDR_MODE_CNTL              0xEC1
+#define A5XX_SP_PERFCTR_SP_SEL_0            0xED0
+#define A5XX_SP_PERFCTR_SP_SEL_1            0xED1
+#define A5XX_SP_PERFCTR_SP_SEL_2            0xED2
+#define A5XX_SP_PERFCTR_SP_SEL_3            0xED3
+#define A5XX_SP_PERFCTR_SP_SEL_4            0xED4
+#define A5XX_SP_PERFCTR_SP_SEL_5            0xED5
+#define A5XX_SP_PERFCTR_SP_SEL_6            0xED6
+#define A5XX_SP_PERFCTR_SP_SEL_7            0xED7
+#define A5XX_SP_PERFCTR_SP_SEL_8            0xED8
+#define A5XX_SP_PERFCTR_SP_SEL_9            0xED9
+#define A5XX_SP_PERFCTR_SP_SEL_10           0xEDA
+#define A5XX_SP_PERFCTR_SP_SEL_11           0xEDB
+
+/* SP Power Counter SP Registers Select */
+#define A5XX_SP_POWERCTR_SP_SEL_0           0xEDC
+#define A5XX_SP_POWERCTR_SP_SEL_1           0xEDD
+#define A5XX_SP_POWERCTR_SP_SEL_2           0xEDE
+#define A5XX_SP_POWERCTR_SP_SEL_3           0xEDF
+
+/* TP registers */
+#define A5XX_TPL1_ADDR_MODE_CNTL            0xF01
+#define A5XX_TPL1_MODE_CNTL                 0xF02
+#define A5XX_TPL1_PERFCTR_TP_SEL_0          0xF10
+#define A5XX_TPL1_PERFCTR_TP_SEL_1          0xF11
+#define A5XX_TPL1_PERFCTR_TP_SEL_2          0xF12
+#define A5XX_TPL1_PERFCTR_TP_SEL_3          0xF13
+#define A5XX_TPL1_PERFCTR_TP_SEL_4          0xF14
+#define A5XX_TPL1_PERFCTR_TP_SEL_5          0xF15
+#define A5XX_TPL1_PERFCTR_TP_SEL_6          0xF16
+#define A5XX_TPL1_PERFCTR_TP_SEL_7          0xF17
+
+/* TP Power Counter TP Registers Select */
+#define A5XX_TPL1_POWERCTR_TP_SEL_0         0xF18
+#define A5XX_TPL1_POWERCTR_TP_SEL_1         0xF19
+#define A5XX_TPL1_POWERCTR_TP_SEL_2         0xF1A
+#define A5XX_TPL1_POWERCTR_TP_SEL_3         0xF1B
+
+/* VBIF registers */
+#define A5XX_VBIF_VERSION                       0x3000
+#define A5XX_VBIF_CLKON                         0x3001
+#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK   0x1
+#define A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT  0x1
+
+#define A5XX_VBIF_ROUND_ROBIN_QOS_ARB      0x3049
+#define A5XX_VBIF_GATE_OFF_WRREQ_EN        0x302A
+
+#define A5XX_VBIF_XIN_HALT_CTRL0	   0x3080
+#define A5XX_VBIF_XIN_HALT_CTRL0_MASK	   0xF
+#define A510_VBIF_XIN_HALT_CTRL0_MASK	   0x7
+#define A5XX_VBIF_XIN_HALT_CTRL1	   0x3081
+
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL            0x3084
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK    0x1
+#define A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT   0x0
+
+#define A5XX_VBIF_TEST_BUS1_CTRL0                0x3085
+#define A5XX_VBIF_TEST_BUS1_CTRL1                0x3086
+#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK  0xF
+#define A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT 0x0
+
+#define A5XX_VBIF_TEST_BUS2_CTRL0                   0x3087
+#define A5XX_VBIF_TEST_BUS2_CTRL1                   0x3088
+#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK     0x1FF
+#define A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT    0x0
+
+#define A5XX_VBIF_TEST_BUS_OUT             0x308c
+
+#define A5XX_VBIF_PERF_CNT_SEL0            0x30D0
+#define A5XX_VBIF_PERF_CNT_SEL1            0x30D1
+#define A5XX_VBIF_PERF_CNT_SEL2            0x30D2
+#define A5XX_VBIF_PERF_CNT_SEL3            0x30D3
+#define A5XX_VBIF_PERF_CNT_LOW0            0x30D8
+#define A5XX_VBIF_PERF_CNT_LOW1            0x30D9
+#define A5XX_VBIF_PERF_CNT_LOW2            0x30DA
+#define A5XX_VBIF_PERF_CNT_LOW3            0x30DB
+#define A5XX_VBIF_PERF_CNT_HIGH0           0x30E0
+#define A5XX_VBIF_PERF_CNT_HIGH1           0x30E1
+#define A5XX_VBIF_PERF_CNT_HIGH2           0x30E2
+#define A5XX_VBIF_PERF_CNT_HIGH3           0x30E3
+
+#define A5XX_VBIF_PERF_PWR_CNT_EN0         0x3100
+#define A5XX_VBIF_PERF_PWR_CNT_EN1         0x3101
+#define A5XX_VBIF_PERF_PWR_CNT_EN2         0x3102
+
+#define A5XX_VBIF_PERF_PWR_CNT_LOW0        0x3110
+#define A5XX_VBIF_PERF_PWR_CNT_LOW1        0x3111
+#define A5XX_VBIF_PERF_PWR_CNT_LOW2        0x3112
+
+#define A5XX_VBIF_PERF_PWR_CNT_HIGH0       0x3118
+#define A5XX_VBIF_PERF_PWR_CNT_HIGH1       0x3119
+#define A5XX_VBIF_PERF_PWR_CNT_HIGH2       0x311A
+
+/* GPMU registers */
+#define A5XX_GPMU_INST_RAM_BASE            0x8800
+#define A5XX_GPMU_DATA_RAM_BASE            0x9800
+#define A5XX_GPMU_SP_POWER_CNTL            0xA881
+#define A5XX_GPMU_RBCCU_CLOCK_CNTL         0xA886
+#define A5XX_GPMU_RBCCU_POWER_CNTL         0xA887
+#define A5XX_GPMU_SP_PWR_CLK_STATUS        0xA88B
+#define A5XX_GPMU_RBCCU_PWR_CLK_STATUS     0xA88D
+#define A5XX_GPMU_PWR_COL_STAGGER_DELAY    0xA891
+#define A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL 0xA892
+#define A5XX_GPMU_PWR_COL_INTER_FRAME_HYST 0xA893
+#define A5XX_GPMU_PWR_COL_BINNING_CTRL     0xA894
+#define A5XX_GPMU_CLOCK_THROTTLE_CTRL      0xA8A3
+#define A5XX_GPMU_WFI_CONFIG               0xA8C1
+#define A5XX_GPMU_RBBM_INTR_INFO           0xA8D6
+#define A5XX_GPMU_CM3_SYSRESET             0xA8D8
+#define A5XX_GPMU_GENERAL_0                0xA8E0
+#define A5XX_GPMU_GENERAL_1                0xA8E1
+
+/* COUNTABLE FOR SP PERFCOUNTER */
+#define A5XX_SP_ALU_ACTIVE_CYCLES          0x1
+#define A5XX_SP0_ICL1_MISSES               0x35
+#define A5XX_SP_FS_CFLOW_INSTRUCTIONS      0x27
+
+/* COUNTABLE FOR TSE PERFCOUNTER */
+#define A5XX_TSE_INPUT_PRIM_NUM            0x6
+
+/* COUNTABLE FOR RBBM PERFCOUNTER */
+#define A5XX_RBBM_ALWAYS_COUNT		0x0
+
+/* GPMU POWER COUNTERS */
+#define A5XX_SP_POWER_COUNTER_0_LO		0xA840
+#define A5XX_SP_POWER_COUNTER_0_HI		0xA841
+#define A5XX_SP_POWER_COUNTER_1_LO		0xA842
+#define A5XX_SP_POWER_COUNTER_1_HI		0xA843
+#define A5XX_SP_POWER_COUNTER_2_LO		0xA844
+#define A5XX_SP_POWER_COUNTER_2_HI		0xA845
+#define A5XX_SP_POWER_COUNTER_3_LO		0xA846
+#define A5XX_SP_POWER_COUNTER_3_HI		0xA847
+
+#define A5XX_TP_POWER_COUNTER_0_LO		0xA848
+#define A5XX_TP_POWER_COUNTER_0_HI		0xA849
+#define A5XX_TP_POWER_COUNTER_1_LO		0xA84A
+#define A5XX_TP_POWER_COUNTER_1_HI		0xA84B
+#define A5XX_TP_POWER_COUNTER_2_LO		0xA84C
+#define A5XX_TP_POWER_COUNTER_2_HI		0xA84D
+#define A5XX_TP_POWER_COUNTER_3_LO		0xA84E
+#define A5XX_TP_POWER_COUNTER_3_HI		0xA84F
+
+#define A5XX_RB_POWER_COUNTER_0_LO		0xA850
+#define A5XX_RB_POWER_COUNTER_0_HI		0xA851
+#define A5XX_RB_POWER_COUNTER_1_LO		0xA852
+#define A5XX_RB_POWER_COUNTER_1_HI		0xA853
+#define A5XX_RB_POWER_COUNTER_2_LO		0xA854
+#define A5XX_RB_POWER_COUNTER_2_HI		0xA855
+#define A5XX_RB_POWER_COUNTER_3_LO		0xA856
+#define A5XX_RB_POWER_COUNTER_3_HI		0xA857
+
+#define A5XX_CCU_POWER_COUNTER_0_LO		0xA858
+#define A5XX_CCU_POWER_COUNTER_0_HI		0xA859
+#define A5XX_CCU_POWER_COUNTER_1_LO		0xA85A
+#define A5XX_CCU_POWER_COUNTER_1_HI		0xA85B
+
+#define A5XX_UCHE_POWER_COUNTER_0_LO		0xA85C
+#define A5XX_UCHE_POWER_COUNTER_0_HI		0xA85D
+#define A5XX_UCHE_POWER_COUNTER_1_LO		0xA85E
+#define A5XX_UCHE_POWER_COUNTER_1_HI		0xA85F
+#define A5XX_UCHE_POWER_COUNTER_2_LO		0xA860
+#define A5XX_UCHE_POWER_COUNTER_2_HI		0xA861
+#define A5XX_UCHE_POWER_COUNTER_3_LO		0xA862
+#define A5XX_UCHE_POWER_COUNTER_3_HI		0xA863
+
+#define A5XX_CP_POWER_COUNTER_0_LO		0xA864
+#define A5XX_CP_POWER_COUNTER_0_HI		0xA865
+#define A5XX_CP_POWER_COUNTER_1_LO		0xA866
+#define A5XX_CP_POWER_COUNTER_1_HI		0xA867
+#define A5XX_CP_POWER_COUNTER_2_LO		0xA868
+#define A5XX_CP_POWER_COUNTER_2_HI		0xA869
+#define A5XX_CP_POWER_COUNTER_3_LO		0xA86A
+#define A5XX_CP_POWER_COUNTER_3_HI		0xA86B
+
+#define A5XX_GPMU_POWER_COUNTER_0_LO		0xA86C
+#define A5XX_GPMU_POWER_COUNTER_0_HI		0xA86D
+#define A5XX_GPMU_POWER_COUNTER_1_LO		0xA86E
+#define A5XX_GPMU_POWER_COUNTER_1_HI		0xA86F
+#define A5XX_GPMU_POWER_COUNTER_2_LO		0xA870
+#define A5XX_GPMU_POWER_COUNTER_2_HI		0xA871
+#define A5XX_GPMU_POWER_COUNTER_3_LO		0xA872
+#define A5XX_GPMU_POWER_COUNTER_3_HI		0xA873
+#define A5XX_GPMU_POWER_COUNTER_4_LO		0xA874
+#define A5XX_GPMU_POWER_COUNTER_4_HI		0xA875
+#define A5XX_GPMU_POWER_COUNTER_5_LO		0xA876
+#define A5XX_GPMU_POWER_COUNTER_5_HI		0xA877
+
+#define A5XX_GPMU_POWER_COUNTER_ENABLE		0xA878
+#define A5XX_GPMU_ALWAYS_ON_COUNTER_LO		0xA879
+#define A5XX_GPMU_ALWAYS_ON_COUNTER_HI		0xA87A
+#define A5XX_GPMU_ALWAYS_ON_COUNTER_RESET	0xA87B
+#define A5XX_GPMU_POWER_COUNTER_SELECT_0	0xA87C
+#define A5XX_GPMU_POWER_COUNTER_SELECT_1	0xA87D
+#define A5XX_GPMU_GPMU_SP_CLOCK_CONTROL		0xA880
+
+#define A5XX_GPMU_CLOCK_THROTTLE_CTRL		0xA8A3
+#define A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL	0xA8A8
+
+#define A5XX_GPMU_TEMP_SENSOR_ID		0xAC00
+#define A5XX_GPMU_TEMP_SENSOR_CONFIG		0xAC01
+#define A5XX_GPMU_DELTA_TEMP_THRESHOLD		0xAC03
+#define A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK	0xAC06
+
+#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_0_1	0xAC40
+#define A5XX_GPMU_LEAKAGE_TEMP_COEFF_2_3	0xAC41
+#define A5XX_GPMU_LEAKAGE_VTG_COEFF_0_1		0xAC42
+#define A5XX_GPMU_LEAKAGE_VTG_COEFF_2_3		0xAC43
+#define A5XX_GPMU_BASE_LEAKAGE			0xAC46
+
+#define A5XX_GPMU_GPMU_VOLTAGE			0xAC60
+#define A5XX_GPMU_GPMU_VOLTAGE_INTR_STATUS	0xAC61
+#define A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK	0xAC62
+#define A5XX_GPMU_GPMU_PWR_THRESHOLD		0xAC80
+#define A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL	0xACC4
+#define A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS	0xACC5
+#define A5XX_GPMU_GPMU_ISENSE_CTRL		0xACD0
+
+#define A5XX_GDPM_CONFIG1			0xB80C
+#define A5XX_GDPM_INT_EN			0xB80F
+#define A5XX_GDPM_INT_MASK			0xB811
+#define A5XX_GPMU_BEC_ENABLE			0xB9A0
+
+/* ISENSE registers */
+#define A5XX_GPU_CS_DECIMAL_ALIGN		0xC16A
+#define A5XX_GPU_CS_SENSOR_PARAM_CORE_1	0xC126
+#define A5XX_GPU_CS_SENSOR_PARAM_CORE_2	0xC127
+#define A5XX_GPU_CS_SW_OV_FUSE_EN		0xC168
+#define A5XX_GPU_CS_SENSOR_GENERAL_STATUS	0xC41A
+#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_0	0xC41D
+#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_2	0xC41F
+#define A5XX_GPU_CS_AMP_CALIBRATION_STATUS1_4	0xC421
+#define A5XX_GPU_CS_ENABLE_REG			0xC520
+#define A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1	0xC557
+#define A5XX_GPU_CS_AMP_CALIBRATION_DONE	0xC565
+#define A5XX_GPU_CS_ENDPOINT_CALIBRATION_DONE   0xC556
+#endif /* _A5XX_REG_H */
+
diff --git a/drivers/gpu/msm/adreno-gpulist.h b/drivers/gpu/msm/adreno-gpulist.h
new file mode 100644
index 0000000..e616338
--- /dev/null
+++ b/drivers/gpu/msm/adreno-gpulist.h
@@ -0,0 +1,300 @@
+/* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#define ANY_ID (~0)
+
+static const struct adreno_gpu_core adreno_gpulist[] = {
+	{
+		.gpurev = ADRENO_REV_A306,
+		.core = 3,
+		.major = 0,
+		.minor = 6,
+		.patchid = 0x00,
+		.pm4fw_name = "a300_pm4.fw",
+		.pfpfw_name = "a300_pfp.fw",
+		.gpudev = &adreno_a3xx_gpudev,
+		.gmem_size = SZ_128K,
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A306A,
+		.core = 3,
+		.major = 0,
+		.minor = 6,
+		.patchid = 0x20,
+		.pm4fw_name = "a300_pm4.fw",
+		.pfpfw_name = "a300_pfp.fw",
+		.gpudev = &adreno_a3xx_gpudev,
+		.gmem_size = SZ_128K,
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A304,
+		.core = 3,
+		.major = 0,
+		.minor = 4,
+		.patchid = 0x00,
+		.pm4fw_name = "a300_pm4.fw",
+		.pfpfw_name = "a300_pfp.fw",
+		.gpudev = &adreno_a3xx_gpudev,
+		.gmem_size = (SZ_64K + SZ_32K),
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A405,
+		.core = 4,
+		.major = 0,
+		.minor = 5,
+		.patchid = ANY_ID,
+		.features = 0,
+		.pm4fw_name = "a420_pm4.fw",
+		.pfpfw_name = "a420_pfp.fw",
+		.gpudev = &adreno_a4xx_gpudev,
+		.gmem_size = SZ_256K,
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A420,
+		.core = 4,
+		.major = 2,
+		.minor = 0,
+		.patchid = ANY_ID,
+		.features = ADRENO_USES_OCMEM | ADRENO_WARM_START |
+					ADRENO_USE_BOOTSTRAP,
+		.pm4fw_name = "a420_pm4.fw",
+		.pfpfw_name = "a420_pfp.fw",
+		.gpudev = &adreno_a4xx_gpudev,
+		.gmem_size = (SZ_1M + SZ_512K),
+		.pm4_jt_idx = 0x901,
+		.pm4_jt_addr = 0x300,
+		.pfp_jt_idx = 0x401,
+		.pfp_jt_addr = 0x400,
+		.pm4_bstrp_size = 0x06,
+		.pfp_bstrp_size = 0x28,
+		.pfp_bstrp_ver = 0x4ff083,
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A430,
+		.core = 4,
+		.major = 3,
+		.minor = 0,
+		.patchid = ANY_ID,
+		.features = ADRENO_USES_OCMEM  | ADRENO_WARM_START |
+			ADRENO_USE_BOOTSTRAP | ADRENO_SPTP_PC | ADRENO_PPD |
+			ADRENO_CONTENT_PROTECTION | ADRENO_PREEMPTION,
+		.pm4fw_name = "a420_pm4.fw",
+		.pfpfw_name = "a420_pfp.fw",
+		.gpudev = &adreno_a4xx_gpudev,
+		.gmem_size = (SZ_1M + SZ_512K),
+		.pm4_jt_idx = 0x901,
+		.pm4_jt_addr = 0x300,
+		.pfp_jt_idx = 0x401,
+		.pfp_jt_addr = 0x400,
+		.pm4_bstrp_size = 0x06,
+		.pfp_bstrp_size = 0x28,
+		.pfp_bstrp_ver = 0x4ff083,
+		.shader_offset = 0x20000,
+		.shader_size = 0x10000,
+		.num_protected_regs = 0x18,
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A418,
+		.core = 4,
+		.major = 1,
+		.minor = 8,
+		.patchid = ANY_ID,
+		.features = ADRENO_USES_OCMEM  | ADRENO_WARM_START |
+			ADRENO_USE_BOOTSTRAP | ADRENO_SPTP_PC,
+		.pm4fw_name = "a420_pm4.fw",
+		.pfpfw_name = "a420_pfp.fw",
+		.gpudev = &adreno_a4xx_gpudev,
+		.gmem_size = (SZ_512K),
+		.pm4_jt_idx = 0x901,
+		.pm4_jt_addr = 0x300,
+		.pfp_jt_idx = 0x401,
+		.pfp_jt_addr = 0x400,
+		.pm4_bstrp_size = 0x06,
+		.pfp_bstrp_size = 0x28,
+		.pfp_bstrp_ver = 0x4ff083,
+		.shader_offset = 0x20000, /* SP and TP addresses */
+		.shader_size = 0x10000,
+		.num_protected_regs = 0x18,
+		.busy_mask = 0x7FFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A530,
+		.core = 5,
+		.major = 3,
+		.minor = 0,
+		.patchid = 0,
+		.pm4fw_name = "a530v1_pm4.fw",
+		.pfpfw_name = "a530v1_pfp.fw",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = SZ_1M,
+		.num_protected_regs = 0x20,
+		.busy_mask = 0xFFFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A530,
+		.core = 5,
+		.major = 3,
+		.minor = 0,
+		.patchid = 1,
+		.features = ADRENO_GPMU | ADRENO_SPTP_PC | ADRENO_LM |
+			ADRENO_PREEMPTION | ADRENO_64BIT |
+			ADRENO_CONTENT_PROTECTION,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.zap_name = "a530_zap",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = SZ_1M,
+		.num_protected_regs = 0x20,
+		.gpmufw_name = "a530_gpmu.fw2",
+		.gpmu_major = 1,
+		.gpmu_minor = 0,
+		.busy_mask = 0xFFFFFFFE,
+		.lm_major = 3,
+		.lm_minor = 0,
+		.gpmu_tsens = 0x00060007,
+		.max_power = 5448,
+		.regfw_name = "a530v2_seq.fw2",
+	},
+	{
+		.gpurev = ADRENO_REV_A530,
+		.core = 5,
+		.major = 3,
+		.minor = 0,
+		.patchid = ANY_ID,
+		.features = ADRENO_GPMU | ADRENO_SPTP_PC | ADRENO_LM |
+			ADRENO_PREEMPTION | ADRENO_64BIT |
+			ADRENO_CONTENT_PROTECTION,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.zap_name = "a530_zap",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = SZ_1M,
+		.num_protected_regs = 0x20,
+		.gpmufw_name = "a530v3_gpmu.fw2",
+		.gpmu_major = 1,
+		.gpmu_minor = 0,
+		.busy_mask = 0xFFFFFFFE,
+		.lm_major = 1,
+		.lm_minor = 0,
+		.gpmu_tsens = 0x00060007,
+		.max_power = 5448,
+		.regfw_name = "a530v3_seq.fw2",
+	},
+	{
+		.gpurev = ADRENO_REV_A505,
+		.core = 5,
+		.major = 0,
+		.minor = 5,
+		.patchid = ANY_ID,
+		.features = ADRENO_PREEMPTION | ADRENO_64BIT,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = (SZ_128K + SZ_8K),
+		.num_protected_regs = 0x20,
+		.busy_mask = 0xFFFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A506,
+		.core = 5,
+		.major = 0,
+		.minor = 6,
+		.patchid = ANY_ID,
+		.features = ADRENO_PREEMPTION | ADRENO_64BIT |
+			ADRENO_CONTENT_PROTECTION | ADRENO_CPZ_RETENTION,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.zap_name = "a506_zap",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = (SZ_128K + SZ_8K),
+		.num_protected_regs = 0x20,
+		.busy_mask = 0xFFFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A510,
+		.core = 5,
+		.major = 1,
+		.minor = 0,
+		.patchid = ANY_ID,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = SZ_256K,
+		.num_protected_regs = 0x20,
+		.busy_mask = 0xFFFFFFFE,
+	},
+	{
+		.gpurev = ADRENO_REV_A540,
+		.core = 5,
+		.major = 4,
+		.minor = 0,
+		.patchid = 0,
+		.features = ADRENO_PREEMPTION | ADRENO_64BIT |
+			ADRENO_CONTENT_PROTECTION |
+			ADRENO_GPMU | ADRENO_SPTP_PC,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.zap_name = "a540_zap",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = SZ_1M,
+		.num_protected_regs = 0x20,
+		.busy_mask = 0xFFFFFFFE,
+		.gpmufw_name = "a540_gpmu.fw2",
+		.gpmu_major = 3,
+		.gpmu_minor = 0,
+		.gpmu_tsens = 0x000C000D,
+		.max_power = 5448,
+	},
+	{
+		.gpurev = ADRENO_REV_A540,
+		.core = 5,
+		.major = 4,
+		.minor = 0,
+		.patchid = ANY_ID,
+		.features = ADRENO_PREEMPTION | ADRENO_64BIT |
+			ADRENO_CONTENT_PROTECTION |
+			ADRENO_GPMU | ADRENO_SPTP_PC,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.zap_name = "a540_zap",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = SZ_1M,
+		.num_protected_regs = 0x20,
+		.busy_mask = 0xFFFFFFFE,
+		.gpmufw_name = "a540_gpmu.fw2",
+		.gpmu_major = 3,
+		.gpmu_minor = 0,
+		.gpmu_tsens = 0x000C000D,
+		.max_power = 5448,
+	},
+	{
+		.gpurev = ADRENO_REV_A512,
+		.core = 5,
+		.major = 1,
+		.minor = 2,
+		.patchid = ANY_ID,
+		.features = ADRENO_64BIT,
+		.pm4fw_name = "a530_pm4.fw",
+		.pfpfw_name = "a530_pfp.fw",
+		.gpudev = &adreno_a5xx_gpudev,
+		.gmem_size = (SZ_256K + SZ_16K),
+		.num_protected_regs = 0x20,
+		.busy_mask = 0xFFFFFFFE,
+	},
+};
diff --git a/drivers/gpu/msm/adreno.c b/drivers/gpu/msm/adreno.c
new file mode 100644
index 0000000..d076d06
--- /dev/null
+++ b/drivers/gpu/msm/adreno.c
@@ -0,0 +1,2841 @@
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+#include <linux/sched.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/delay.h>
+#include <linux/input.h>
+#include <soc/qcom/scm.h>
+
+#include <linux/msm-bus-board.h>
+#include <linux/msm-bus.h>
+
+#include "kgsl.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_iommu.h"
+#include "kgsl_trace.h"
+
+#include "adreno.h"
+#include "adreno_iommu.h"
+#include "adreno_compat.h"
+#include "adreno_pm4types.h"
+#include "adreno_trace.h"
+
+#include "a3xx_reg.h"
+#include "adreno_snapshot.h"
+
+/* Include the master list of GPU cores that are supported */
+#include "adreno-gpulist.h"
+#include "adreno_dispatch.h"
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "adreno."
+
+static bool nopreempt;
+module_param(nopreempt, bool, 0444);
+MODULE_PARM_DESC(nopreempt, "Disable GPU preemption");
+
+#define DRIVER_VERSION_MAJOR   3
+#define DRIVER_VERSION_MINOR   1
+
+/* Number of times to try hard reset */
+#define NUM_TIMES_RESET_RETRY 5
+
+#define KGSL_LOG_LEVEL_DEFAULT 3
+
+static void adreno_input_work(struct work_struct *work);
+static unsigned int counter_delta(struct kgsl_device *device,
+	unsigned int reg, unsigned int *counter);
+
+static struct devfreq_msm_adreno_tz_data adreno_tz_data = {
+	.bus = {
+		.max = 350,
+	},
+	.device_id = KGSL_DEVICE_3D0,
+};
+
+static const struct kgsl_functable adreno_functable;
+
+static struct adreno_device device_3d0 = {
+	.dev = {
+		KGSL_DEVICE_COMMON_INIT(device_3d0.dev),
+		.pwrscale = KGSL_PWRSCALE_INIT(&adreno_tz_data),
+		.name = DEVICE_3D0_NAME,
+		.id = KGSL_DEVICE_3D0,
+		.pwrctrl = {
+			.irq_name = "kgsl_3d0_irq",
+		},
+		.iomemname = "kgsl_3d0_reg_memory",
+		.shadermemname = "kgsl_3d0_shader_memory",
+		.ftbl = &adreno_functable,
+		.cmd_log = KGSL_LOG_LEVEL_DEFAULT,
+		.ctxt_log = KGSL_LOG_LEVEL_DEFAULT,
+		.drv_log = KGSL_LOG_LEVEL_DEFAULT,
+		.mem_log = KGSL_LOG_LEVEL_DEFAULT,
+		.pwr_log = KGSL_LOG_LEVEL_DEFAULT,
+	},
+	.gmem_size = SZ_256K,
+	.pfp_fw = NULL,
+	.pm4_fw = NULL,
+	.ft_policy = KGSL_FT_DEFAULT_POLICY,
+	.ft_pf_policy = KGSL_FT_PAGEFAULT_DEFAULT_POLICY,
+	.fast_hang_detect = 1,
+	.long_ib_detect = 1,
+	.input_work = __WORK_INITIALIZER(device_3d0.input_work,
+		adreno_input_work),
+	.pwrctrl_flag = BIT(ADRENO_SPTP_PC_CTRL) | BIT(ADRENO_PPD_CTRL) |
+		BIT(ADRENO_LM_CTRL) | BIT(ADRENO_HWCG_CTRL) |
+		BIT(ADRENO_THROTTLING_CTRL),
+	.profile.enabled = false,
+	.active_list = LIST_HEAD_INIT(device_3d0.active_list),
+	.active_list_lock = __SPIN_LOCK_UNLOCKED(device_3d0.active_list_lock),
+};
+
+/* Ptr to array for the current set of fault detect registers */
+unsigned int *adreno_ft_regs;
+/* Total number of fault detect registers */
+unsigned int adreno_ft_regs_num;
+/* Ptr to array for the current fault detect registers values */
+unsigned int *adreno_ft_regs_val;
+/* Array of default fault detect registers */
+static unsigned int adreno_ft_regs_default[] = {
+	ADRENO_REG_RBBM_STATUS,
+	ADRENO_REG_CP_RB_RPTR,
+	ADRENO_REG_CP_IB1_BASE,
+	ADRENO_REG_CP_IB1_BUFSZ,
+	ADRENO_REG_CP_IB2_BASE,
+	ADRENO_REG_CP_IB2_BUFSZ
+};
+
+/* Nice level for the higher priority GPU start thread */
+int adreno_wake_nice = -7;
+
+/* Number of milliseconds to stay active active after a wake on touch */
+unsigned int adreno_wake_timeout = 100;
+
+/**
+ * adreno_readreg64() - Read a 64bit register by getting its offset from the
+ * offset array defined in gpudev node
+ * @adreno_dev:		Pointer to the the adreno device
+ * @lo:	lower 32bit register enum that is to be read
+ * @hi:	higher 32bit register enum that is to be read
+ * @val: 64 bit Register value read is placed here
+ */
+void adreno_readreg64(struct adreno_device *adreno_dev,
+		enum adreno_regs lo, enum adreno_regs hi, uint64_t *val)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int val_lo = 0, val_hi = 0;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (adreno_checkreg_off(adreno_dev, lo))
+		kgsl_regread(device, gpudev->reg_offsets->offsets[lo], &val_lo);
+	if (adreno_checkreg_off(adreno_dev, hi))
+		kgsl_regread(device, gpudev->reg_offsets->offsets[hi], &val_hi);
+
+	*val = (val_lo | ((uint64_t)val_hi << 32));
+}
+
+/**
+ * adreno_writereg64() - Write a 64bit register by getting its offset from the
+ * offset array defined in gpudev node
+ * @adreno_dev:	Pointer to the the adreno device
+ * @lo:	lower 32bit register enum that is to be written
+ * @hi:	higher 32bit register enum that is to be written
+ * @val: 64 bit value to write
+ */
+void adreno_writereg64(struct adreno_device *adreno_dev,
+		enum adreno_regs lo, enum adreno_regs hi, uint64_t val)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (adreno_checkreg_off(adreno_dev, lo))
+		kgsl_regwrite(KGSL_DEVICE(adreno_dev),
+			gpudev->reg_offsets->offsets[lo], lower_32_bits(val));
+	if (adreno_checkreg_off(adreno_dev, hi))
+		kgsl_regwrite(KGSL_DEVICE(adreno_dev),
+			gpudev->reg_offsets->offsets[hi], upper_32_bits(val));
+}
+
+/**
+ * adreno_get_rptr() - Get the current ringbuffer read pointer
+ * @rb: Pointer the ringbuffer to query
+ *
+ * Get the latest rptr
+ */
+unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	unsigned int rptr = 0;
+
+	if (adreno_is_a3xx(adreno_dev))
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR,
+				&rptr);
+	else {
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		kgsl_sharedmem_readl(&device->scratch, &rptr,
+				SCRATCH_RPTR_OFFSET(rb->id));
+	}
+
+	return rptr;
+}
+
+/**
+ * adreno_of_read_property() - Adreno read property
+ * @node: Device node
+ *
+ * Read a u32 property.
+ */
+static inline int adreno_of_read_property(struct device_node *node,
+	const char *prop, unsigned int *ptr)
+{
+	int ret = of_property_read_u32(node, prop, ptr);
+
+	if (ret)
+		KGSL_CORE_ERR("Unable to read '%s'\n", prop);
+	return ret;
+}
+
+static void __iomem *efuse_base;
+static size_t efuse_len;
+
+int adreno_efuse_map(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct resource *res;
+
+	if (efuse_base != NULL)
+		return 0;
+
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+		"qfprom_memory");
+
+	if (res == NULL)
+		return -ENODEV;
+
+	efuse_base = ioremap(res->start, resource_size(res));
+	if (efuse_base == NULL)
+		return -ENODEV;
+
+	efuse_len = resource_size(res);
+	return 0;
+}
+
+void adreno_efuse_unmap(struct adreno_device *adreno_dev)
+{
+	if (efuse_base != NULL) {
+		iounmap(efuse_base);
+		efuse_base = NULL;
+		efuse_len = 0;
+	}
+}
+
+int adreno_efuse_read_u32(struct adreno_device *adreno_dev, unsigned int offset,
+		unsigned int *val)
+{
+	if (efuse_base == NULL)
+		return -ENODEV;
+
+	if (offset >= efuse_len)
+		return -ERANGE;
+
+	if (val != NULL) {
+		*val = readl_relaxed(efuse_base + offset);
+		/* Make sure memory is updated before returning */
+		rmb();
+	}
+
+	return 0;
+}
+
+static int _get_counter(struct adreno_device *adreno_dev,
+		int group, int countable, unsigned int *lo,
+		unsigned int *hi)
+{
+	int ret = 0;
+
+	if (*lo == 0) {
+
+		ret = adreno_perfcounter_get(adreno_dev, group, countable,
+			lo, hi, PERFCOUNTER_FLAG_KERNEL);
+
+		if (ret) {
+			KGSL_DRV_ERR(KGSL_DEVICE(adreno_dev),
+				"Unable to allocate fault detect performance counter %d/%d\n",
+				group, countable);
+			KGSL_DRV_ERR(KGSL_DEVICE(adreno_dev),
+				"GPU fault detect will be less reliable\n");
+		}
+	}
+
+	return ret;
+}
+
+static inline void _put_counter(struct adreno_device *adreno_dev,
+		int group, int countable, unsigned int *lo,
+		unsigned int *hi)
+{
+	if (*lo != 0)
+		adreno_perfcounter_put(adreno_dev, group, countable,
+			PERFCOUNTER_FLAG_KERNEL);
+
+	*lo = 0;
+	*hi = 0;
+}
+
+/**
+ * adreno_fault_detect_start() - Allocate performance counters
+ * used for fast fault detection
+ * @adreno_dev: Pointer to an adreno_device structure
+ *
+ * Allocate the series of performance counters that should be periodically
+ * checked to verify that the GPU is still moving
+ */
+void adreno_fault_detect_start(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int i, j = ARRAY_SIZE(adreno_ft_regs_default);
+
+	if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv))
+		return;
+
+	if (adreno_dev->fast_hang_detect == 1)
+		return;
+
+	for (i = 0; i < gpudev->ft_perf_counters_count; i++) {
+		_get_counter(adreno_dev, gpudev->ft_perf_counters[i].counter,
+			 gpudev->ft_perf_counters[i].countable,
+			 &adreno_ft_regs[j + (i * 2)],
+			 &adreno_ft_regs[j + ((i * 2) + 1)]);
+	}
+
+	adreno_dev->fast_hang_detect = 1;
+}
+
+/**
+ * adreno_fault_detect_stop() - Release performance counters
+ * used for fast fault detection
+ * @adreno_dev: Pointer to an adreno_device structure
+ *
+ * Release the counters allocated in adreno_fault_detect_start
+ */
+void adreno_fault_detect_stop(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int i, j = ARRAY_SIZE(adreno_ft_regs_default);
+
+	if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv))
+		return;
+
+	if (!adreno_dev->fast_hang_detect)
+		return;
+
+	for (i = 0; i < gpudev->ft_perf_counters_count; i++) {
+		_put_counter(adreno_dev, gpudev->ft_perf_counters[i].counter,
+			 gpudev->ft_perf_counters[i].countable,
+			 &adreno_ft_regs[j + (i * 2)],
+			 &adreno_ft_regs[j + ((i * 2) + 1)]);
+
+	}
+
+	adreno_dev->fast_hang_detect = 0;
+}
+
+/*
+ * A workqueue callback responsible for actually turning on the GPU after a
+ * touch event. kgsl_pwrctrl_change_state(ACTIVE) is used without any
+ * active_count protection to avoid the need to maintain state.  Either
+ * somebody will start using the GPU or the idle timer will fire and put the
+ * GPU back into slumber.
+ */
+static void adreno_input_work(struct work_struct *work)
+{
+	struct adreno_device *adreno_dev = container_of(work,
+			struct adreno_device, input_work);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	mutex_lock(&device->mutex);
+
+	device->flags |= KGSL_FLAG_WAKE_ON_TOUCH;
+
+	/*
+	 * Don't schedule adreno_start in a high priority workqueue, we are
+	 * already in a workqueue which should be sufficient
+	 */
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+
+	/*
+	 * When waking up from a touch event we want to stay active long enough
+	 * for the user to send a draw command.  The default idle timer timeout
+	 * is shorter than we want so go ahead and push the idle timer out
+	 * further for this special case
+	 */
+	mod_timer(&device->idle_timer,
+		jiffies + msecs_to_jiffies(adreno_wake_timeout));
+	mutex_unlock(&device->mutex);
+}
+
+/*
+ * Process input events and schedule work if needed.  At this point we are only
+ * interested in groking EV_ABS touchscreen events
+ */
+static void adreno_input_event(struct input_handle *handle, unsigned int type,
+		unsigned int code, int value)
+{
+	struct kgsl_device *device = handle->handler->private;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* Only consider EV_ABS (touch) events */
+	if (type != EV_ABS)
+		return;
+
+	/*
+	 * Don't do anything if anything hasn't been rendered since we've been
+	 * here before
+	 */
+
+	if (device->flags & KGSL_FLAG_WAKE_ON_TOUCH)
+		return;
+
+	/*
+	 * If the device is in nap, kick the idle timer to make sure that we
+	 * don't go into slumber before the first render. If the device is
+	 * already in slumber schedule the wake.
+	 */
+
+	if (device->state == KGSL_STATE_NAP) {
+		/*
+		 * Set the wake on touch bit to keep from coming back here and
+		 * keeping the device in nap without rendering
+		 */
+
+		device->flags |= KGSL_FLAG_WAKE_ON_TOUCH;
+
+		mod_timer(&device->idle_timer,
+			jiffies + device->pwrctrl.interval_timeout);
+	} else if (device->state == KGSL_STATE_SLUMBER) {
+		schedule_work(&adreno_dev->input_work);
+	}
+}
+
+#ifdef CONFIG_INPUT
+static int adreno_input_connect(struct input_handler *handler,
+		struct input_dev *dev, const struct input_device_id *id)
+{
+	struct input_handle *handle;
+	int ret;
+
+	handle = kzalloc(sizeof(*handle), GFP_KERNEL);
+	if (handle == NULL)
+		return -ENOMEM;
+
+	handle->dev = dev;
+	handle->handler = handler;
+	handle->name = handler->name;
+
+	ret = input_register_handle(handle);
+	if (ret) {
+		kfree(handle);
+		return ret;
+	}
+
+	ret = input_open_device(handle);
+	if (ret) {
+		input_unregister_handle(handle);
+		kfree(handle);
+	}
+
+	return ret;
+}
+
+static void adreno_input_disconnect(struct input_handle *handle)
+{
+	input_close_device(handle);
+	input_unregister_handle(handle);
+	kfree(handle);
+}
+#else
+static int adreno_input_connect(struct input_handler *handler,
+		struct input_dev *dev, const struct input_device_id *id)
+{
+	return 0;
+}
+static void adreno_input_disconnect(struct input_handle *handle) {}
+#endif
+
+/*
+ * We are only interested in EV_ABS events so only register handlers for those
+ * input devices that have EV_ABS events
+ */
+static const struct input_device_id adreno_input_ids[] = {
+	{
+		.flags = INPUT_DEVICE_ID_MATCH_EVBIT,
+		.evbit = { BIT_MASK(EV_ABS) },
+		/* assumption: MT_.._X & MT_.._Y are in the same long */
+		.absbit = { [BIT_WORD(ABS_MT_POSITION_X)] =
+				BIT_MASK(ABS_MT_POSITION_X) |
+				BIT_MASK(ABS_MT_POSITION_Y) },
+	},
+	{ },
+};
+
+static struct input_handler adreno_input_handler = {
+	.event = adreno_input_event,
+	.connect = adreno_input_connect,
+	.disconnect = adreno_input_disconnect,
+	.name = "kgsl",
+	.id_table = adreno_input_ids,
+};
+
+static int adreno_soft_reset(struct kgsl_device *device);
+
+/*
+ * _soft_reset() - Soft reset GPU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * Soft reset the GPU by doing a AHB write of value 1 to RBBM_SW_RESET
+ * register. This is used when we want to reset the GPU without
+ * turning off GFX power rail. The reset when asserted resets
+ * all the HW logic, restores GPU registers to default state and
+ * flushes out pending VBIF transactions.
+ */
+static void _soft_reset(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	/*
+	 * On a530 v1 RBBM cannot be reset in soft reset.
+	 * Reset all blocks except RBBM for a530v1.
+	 */
+	if (adreno_is_a530v1(adreno_dev)) {
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD,
+						 0xFFDFFC0);
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD2,
+						0x1FFFFFFF);
+	} else {
+
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, 1);
+		/*
+		 * Do a dummy read to get a brief read cycle delay for the
+		 * reset to take effect
+		 */
+		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, &reg);
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_SW_RESET_CMD, 0);
+	}
+
+	/* The SP/TP regulator gets turned off after a soft reset */
+
+	if (gpudev->regulator_enable)
+		gpudev->regulator_enable(adreno_dev);
+}
+
+
+void adreno_irqctrl(struct adreno_device *adreno_dev, int state)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int mask = state ? gpudev->irq->mask : 0;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK, mask);
+}
+
+/*
+ * adreno_hang_int_callback() - Isr for fatal interrupts that hang GPU
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit)
+{
+	KGSL_DRV_CRIT_RATELIMIT(KGSL_DEVICE(adreno_dev),
+			"MISC: GPU hang detected\n");
+	adreno_irqctrl(adreno_dev, 0);
+
+	/* Trigger a fault in the dispatcher - this will effect a restart */
+	adreno_set_gpu_fault(adreno_dev, ADRENO_HARD_FAULT);
+	adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+}
+
+/*
+ * adreno_cp_callback() - CP interrupt handler
+ * @adreno_dev: Adreno device pointer
+ * @irq: irq number
+ *
+ * Handle the cp interrupt generated by GPU.
+ */
+void adreno_cp_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	adreno_dispatcher_schedule(device);
+}
+
+static irqreturn_t adreno_irq_handler(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_irq *irq_params = gpudev->irq;
+	irqreturn_t ret = IRQ_NONE;
+	unsigned int status = 0, tmp, int_bit;
+	int i;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_INT_0_STATUS, &status);
+
+	/*
+	 * Clear all the interrupt bits but ADRENO_INT_RBBM_AHB_ERROR. Because
+	 * even if we clear it here, it will stay high until it is cleared
+	 * in its respective handler. Otherwise, the interrupt handler will
+	 * fire again.
+	 */
+	int_bit = ADRENO_INT_BIT(adreno_dev, ADRENO_INT_RBBM_AHB_ERROR);
+	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_CLEAR_CMD,
+				status & ~int_bit);
+
+	/* Loop through all set interrupts and call respective handlers */
+	for (tmp = status; tmp != 0;) {
+		i = fls(tmp) - 1;
+
+		if (irq_params->funcs[i].func != NULL) {
+			if (irq_params->mask & BIT(i))
+				irq_params->funcs[i].func(adreno_dev, i);
+		} else
+			KGSL_DRV_CRIT_RATELIMIT(device,
+					"Unhandled interrupt bit %x\n", i);
+
+		ret = IRQ_HANDLED;
+
+		tmp &= ~BIT(i);
+	}
+
+	gpudev->irq_trace(adreno_dev, status);
+
+	/*
+	 * Clear ADRENO_INT_RBBM_AHB_ERROR bit after this interrupt has been
+	 * cleared in its respective handler
+	 */
+	if (status & int_bit)
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_CLEAR_CMD,
+				int_bit);
+
+	return ret;
+
+}
+
+static inline bool _rev_match(unsigned int id, unsigned int entry)
+{
+	return (entry == ANY_ID || entry == id);
+}
+
+static inline const struct adreno_gpu_core *_get_gpu_core(unsigned int chipid)
+{
+	unsigned int core = ADRENO_CHIPID_CORE(chipid);
+	unsigned int major = ADRENO_CHIPID_MAJOR(chipid);
+	unsigned int minor = ADRENO_CHIPID_MINOR(chipid);
+	unsigned int patchid = ADRENO_CHIPID_PATCH(chipid);
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(adreno_gpulist); i++) {
+		if (core == adreno_gpulist[i].core &&
+		    _rev_match(major, adreno_gpulist[i].major) &&
+		    _rev_match(minor, adreno_gpulist[i].minor) &&
+		    _rev_match(patchid, adreno_gpulist[i].patchid))
+			return &adreno_gpulist[i];
+	}
+
+	return NULL;
+}
+
+static void
+adreno_identify_gpu(struct adreno_device *adreno_dev)
+{
+	const struct adreno_reg_offsets *reg_offsets;
+	struct adreno_gpudev *gpudev;
+	int i;
+
+	if (kgsl_property_read_u32(KGSL_DEVICE(adreno_dev), "qcom,chipid",
+		&adreno_dev->chipid))
+		KGSL_DRV_FATAL(KGSL_DEVICE(adreno_dev),
+			"No GPU chip ID was specified\n");
+
+	adreno_dev->gpucore = _get_gpu_core(adreno_dev->chipid);
+
+	if (adreno_dev->gpucore == NULL)
+		KGSL_DRV_FATAL(KGSL_DEVICE(adreno_dev),
+			"Unknown GPU chip ID %8.8X\n", adreno_dev->chipid);
+
+	/*
+	 * The gmem size might be dynamic when ocmem is involved so copy it out
+	 * of the gpu device
+	 */
+
+	adreno_dev->gmem_size = adreno_dev->gpucore->gmem_size;
+
+	/*
+	 * Initialize uninitialzed gpu registers, only needs to be done once
+	 * Make all offsets that are not initialized to ADRENO_REG_UNUSED
+	 */
+
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	reg_offsets = gpudev->reg_offsets;
+
+	for (i = 0; i < ADRENO_REG_REGISTER_MAX; i++) {
+		if (reg_offsets->offset_0 != i && !reg_offsets->offsets[i])
+			reg_offsets->offsets[i] = ADRENO_REG_UNUSED;
+	}
+
+	/* Do target specific identification */
+	if (gpudev->platform_setup != NULL)
+		gpudev->platform_setup(adreno_dev);
+}
+
+static const struct platform_device_id adreno_id_table[] = {
+	{ DEVICE_3D0_NAME, (unsigned long) &device_3d0, },
+	{},
+};
+
+MODULE_DEVICE_TABLE(platform, adreno_id_table);
+
+static const struct of_device_id adreno_match_table[] = {
+	{ .compatible = "qcom,kgsl-3d0", .data = &device_3d0 },
+	{}
+};
+
+static int adreno_of_parse_pwrlevels(struct adreno_device *adreno_dev,
+		struct device_node *node)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct device_node *child;
+
+	pwr->num_pwrlevels = 0;
+
+	for_each_child_of_node(node, child) {
+		unsigned int index;
+		struct kgsl_pwrlevel *level;
+
+		if (adreno_of_read_property(child, "reg", &index))
+			return -EINVAL;
+
+		if (index >= KGSL_MAX_PWRLEVELS) {
+			KGSL_CORE_ERR("Pwrlevel index %d is out of range\n",
+				index);
+			continue;
+		}
+
+		if (index >= pwr->num_pwrlevels)
+			pwr->num_pwrlevels = index + 1;
+
+		level = &pwr->pwrlevels[index];
+
+		if (adreno_of_read_property(child, "qcom,gpu-freq",
+			&level->gpu_freq))
+			return -EINVAL;
+
+		if (adreno_of_read_property(child, "qcom,bus-freq",
+			&level->bus_freq))
+			return -EINVAL;
+
+		if (of_property_read_u32(child, "qcom,bus-min",
+			&level->bus_min))
+			level->bus_min = level->bus_freq;
+
+		if (of_property_read_u32(child, "qcom,bus-max",
+			&level->bus_max))
+			level->bus_max = level->bus_freq;
+	}
+
+	return 0;
+}
+
+
+static void adreno_of_get_initial_pwrlevel(struct adreno_device *adreno_dev,
+		struct device_node *node)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int init_level = 1;
+
+	of_property_read_u32(node, "qcom,initial-pwrlevel", &init_level);
+
+	if (init_level < 0 || init_level > pwr->num_pwrlevels)
+		init_level = 1;
+
+	pwr->active_pwrlevel = init_level;
+	pwr->default_pwrlevel = init_level;
+}
+
+static int adreno_of_get_legacy_pwrlevels(struct adreno_device *adreno_dev,
+		struct device_node *parent)
+{
+	struct device_node *node;
+	int ret;
+
+	node = of_find_node_by_name(parent, "qcom,gpu-pwrlevels");
+
+	if (node == NULL) {
+		KGSL_CORE_ERR("Unable to find 'qcom,gpu-pwrlevels'\n");
+		return -EINVAL;
+	}
+
+	ret = adreno_of_parse_pwrlevels(adreno_dev, node);
+	if (ret == 0)
+		adreno_of_get_initial_pwrlevel(adreno_dev, parent);
+	return ret;
+}
+
+static int adreno_of_get_pwrlevels(struct adreno_device *adreno_dev,
+		struct device_node *parent)
+{
+	struct device_node *node, *child;
+
+	node = of_find_node_by_name(parent, "qcom,gpu-pwrlevel-bins");
+	if (node == NULL)
+		return adreno_of_get_legacy_pwrlevels(adreno_dev, parent);
+
+	for_each_child_of_node(node, child) {
+		unsigned int bin;
+
+		if (of_property_read_u32(child, "qcom,speed-bin", &bin))
+			continue;
+
+		if (bin == adreno_dev->speed_bin) {
+			int ret;
+
+			ret = adreno_of_parse_pwrlevels(adreno_dev, child);
+			if (ret == 0)
+				adreno_of_get_initial_pwrlevel(adreno_dev,
+								child);
+			return ret;
+		}
+	}
+
+	return -ENODEV;
+}
+
+static inline struct adreno_device *adreno_get_dev(struct platform_device *pdev)
+{
+	const struct of_device_id *of_id =
+		of_match_device(adreno_match_table, &pdev->dev);
+
+	return of_id ? (struct adreno_device *) of_id->data : NULL;
+}
+
+static struct {
+	unsigned int quirk;
+	const char *prop;
+} adreno_quirks[] = {
+	 { ADRENO_QUIRK_TWO_PASS_USE_WFI, "qcom,gpu-quirk-two-pass-use-wfi" },
+	 { ADRENO_QUIRK_IOMMU_SYNC, "qcom,gpu-quirk-iommu-sync" },
+	 { ADRENO_QUIRK_CRITICAL_PACKETS, "qcom,gpu-quirk-critical-packets" },
+	 { ADRENO_QUIRK_FAULT_DETECT_MASK, "qcom,gpu-quirk-fault-detect-mask" },
+	 { ADRENO_QUIRK_DISABLE_RB_DP2CLOCKGATING,
+			"qcom,gpu-quirk-dp2clockgating-disable" },
+	 { ADRENO_QUIRK_DISABLE_LMLOADKILL,
+			"qcom,gpu-quirk-lmloadkill-disable" },
+};
+
+static int adreno_of_get_power(struct adreno_device *adreno_dev,
+		struct platform_device *pdev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct device_node *node = pdev->dev.of_node;
+	int i;
+	unsigned int timeout;
+
+	if (of_property_read_string(node, "label", &pdev->name)) {
+		KGSL_CORE_ERR("Unable to read 'label'\n");
+		return -EINVAL;
+	}
+
+	if (adreno_of_read_property(node, "qcom,id", &pdev->id))
+		return -EINVAL;
+
+	/* Set up quirks and other boolean options */
+	for (i = 0; i < ARRAY_SIZE(adreno_quirks); i++) {
+		if (of_property_read_bool(node, adreno_quirks[i].prop))
+			adreno_dev->quirks |= adreno_quirks[i].quirk;
+	}
+
+	if (adreno_of_get_pwrlevels(adreno_dev, node))
+		return -EINVAL;
+
+	/* get pm-qos-active-latency, set it to default if not found */
+	if (of_property_read_u32(node, "qcom,pm-qos-active-latency",
+		&device->pwrctrl.pm_qos_active_latency))
+		device->pwrctrl.pm_qos_active_latency = 501;
+
+	/* get pm-qos-cpu-mask-latency, set it to default if not found */
+	if (of_property_read_u32(node, "qcom,l2pc-cpu-mask-latency",
+		&device->pwrctrl.pm_qos_cpu_mask_latency))
+		device->pwrctrl.pm_qos_cpu_mask_latency = 501;
+
+	/* get pm-qos-wakeup-latency, set it to default if not found */
+	if (of_property_read_u32(node, "qcom,pm-qos-wakeup-latency",
+		&device->pwrctrl.pm_qos_wakeup_latency))
+		device->pwrctrl.pm_qos_wakeup_latency = 101;
+
+	if (of_property_read_u32(node, "qcom,idle-timeout", &timeout))
+		timeout = 80;
+
+	device->pwrctrl.interval_timeout = msecs_to_jiffies(timeout);
+
+	device->pwrctrl.bus_control = of_property_read_bool(node,
+		"qcom,bus-control");
+
+	return 0;
+}
+
+#ifdef CONFIG_QCOM_OCMEM
+static int
+adreno_ocmem_malloc(struct adreno_device *adreno_dev)
+{
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_USES_OCMEM))
+		return 0;
+
+	if (adreno_dev->ocmem_hdl == NULL) {
+		adreno_dev->ocmem_hdl =
+			ocmem_allocate(OCMEM_GRAPHICS, adreno_dev->gmem_size);
+		if (IS_ERR_OR_NULL(adreno_dev->ocmem_hdl)) {
+			adreno_dev->ocmem_hdl = NULL;
+			return -ENOMEM;
+		}
+
+		adreno_dev->gmem_size = adreno_dev->ocmem_hdl->len;
+		adreno_dev->gmem_base = adreno_dev->ocmem_hdl->addr;
+	}
+
+	return 0;
+}
+
+static void
+adreno_ocmem_free(struct adreno_device *adreno_dev)
+{
+	if (adreno_dev->ocmem_hdl != NULL) {
+		ocmem_free(OCMEM_GRAPHICS, adreno_dev->ocmem_hdl);
+		adreno_dev->ocmem_hdl = NULL;
+	}
+}
+#else
+static int
+adreno_ocmem_malloc(struct adreno_device *adreno_dev)
+{
+	return 0;
+}
+
+static void
+adreno_ocmem_free(struct adreno_device *adreno_dev)
+{
+}
+#endif
+
+static int adreno_probe(struct platform_device *pdev)
+{
+	struct kgsl_device *device;
+	struct adreno_device *adreno_dev;
+	int status;
+
+	adreno_dev = adreno_get_dev(pdev);
+
+	if (adreno_dev == NULL) {
+		pr_err("adreno: qcom,kgsl-3d0 does not exist in the device tree");
+		return -ENODEV;
+	}
+
+	device = KGSL_DEVICE(adreno_dev);
+	device->pdev = pdev;
+
+	/* Get the chip ID from the DT and set up target specific parameters */
+	adreno_identify_gpu(adreno_dev);
+
+	status = adreno_of_get_power(adreno_dev, pdev);
+	if (status) {
+		device->pdev = NULL;
+		return status;
+	}
+
+	/*
+	 * The SMMU APIs use unsigned long for virtual addresses which means
+	 * that we cannot use 64 bit virtual addresses on a 32 bit kernel even
+	 * though the hardware and the rest of the KGSL driver supports it.
+	 */
+	if (adreno_support_64bit(adreno_dev))
+		device->mmu.features |= KGSL_MMU_64BIT;
+
+	status = kgsl_device_platform_probe(device);
+	if (status) {
+		device->pdev = NULL;
+		return status;
+	}
+
+	/*
+	 * qcom,iommu-secure-id is used to identify MMUs that can handle secure
+	 * content but that is only part of the story - the GPU also has to be
+	 * able to handle secure content.  Unfortunately in a classic catch-22
+	 * we cannot identify the GPU until after the DT is parsed. tl;dr -
+	 * check the GPU capabilities here and modify mmu->secured accordingly
+	 */
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION))
+		device->mmu.secured = false;
+
+	status = adreno_ringbuffer_probe(adreno_dev, nopreempt);
+	if (status)
+		goto out;
+
+	status = adreno_dispatcher_init(adreno_dev);
+	if (status)
+		goto out;
+
+	adreno_debugfs_init(adreno_dev);
+	adreno_profile_init(adreno_dev);
+
+	adreno_sysfs_init(adreno_dev);
+
+	kgsl_pwrscale_init(&pdev->dev, CONFIG_QCOM_ADRENO_DEFAULT_GOVERNOR);
+
+	/* Initialize coresight for the target */
+	adreno_coresight_init(adreno_dev);
+
+	adreno_input_handler.private = device;
+
+#ifdef CONFIG_INPUT
+	/*
+	 * It isn't fatal if we cannot register the input handler.  Sad,
+	 * perhaps, but not fatal
+	 */
+	if (input_register_handler(&adreno_input_handler))
+		KGSL_DRV_ERR(device, "Unable to register the input handler\n");
+#endif
+out:
+	if (status) {
+		adreno_ringbuffer_close(adreno_dev);
+		kgsl_device_platform_remove(device);
+		device->pdev = NULL;
+	}
+
+	return status;
+}
+
+static void _adreno_free_memories(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (test_bit(ADRENO_DEVICE_DRAWOBJ_PROFILE, &adreno_dev->priv))
+		kgsl_free_global(device, &adreno_dev->profile_buffer);
+
+	/* Free local copies of firmware and other command streams */
+	kfree(adreno_dev->pfp_fw);
+	adreno_dev->pfp_fw = NULL;
+
+	kfree(adreno_dev->pm4_fw);
+	adreno_dev->pm4_fw = NULL;
+
+	kfree(adreno_dev->gpmu_cmds);
+	adreno_dev->gpmu_cmds = NULL;
+
+	kgsl_free_global(device, &adreno_dev->pm4);
+	kgsl_free_global(device, &adreno_dev->pfp);
+}
+
+static int adreno_remove(struct platform_device *pdev)
+{
+	struct adreno_device *adreno_dev = adreno_get_dev(pdev);
+	struct adreno_gpudev *gpudev;
+	struct kgsl_device *device;
+
+	if (adreno_dev == NULL)
+		return 0;
+
+	device = KGSL_DEVICE(adreno_dev);
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->remove != NULL)
+		gpudev->remove(adreno_dev);
+
+	/* The memory is fading */
+	_adreno_free_memories(adreno_dev);
+
+#ifdef CONFIG_INPUT
+	input_unregister_handler(&adreno_input_handler);
+#endif
+	adreno_sysfs_close(adreno_dev);
+
+	adreno_coresight_remove(adreno_dev);
+	adreno_profile_close(adreno_dev);
+
+	kgsl_pwrscale_close(device);
+
+	adreno_dispatcher_close(adreno_dev);
+	adreno_ringbuffer_close(adreno_dev);
+
+	adreno_fault_detect_stop(adreno_dev);
+
+	kfree(adreno_ft_regs);
+	adreno_ft_regs = NULL;
+
+	kfree(adreno_ft_regs_val);
+	adreno_ft_regs_val = NULL;
+
+	if (efuse_base != NULL)
+		iounmap(efuse_base);
+
+	adreno_perfcounter_close(adreno_dev);
+	kgsl_device_platform_remove(device);
+
+	if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv)) {
+		kgsl_free_global(device, &adreno_dev->pwron_fixup);
+		clear_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv);
+	}
+	clear_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv);
+
+	return 0;
+}
+
+static void adreno_fault_detect_init(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int i, val = adreno_dev->fast_hang_detect;
+
+	/* Disable the fast hang detect bit until we know its a go */
+	adreno_dev->fast_hang_detect = 0;
+
+	adreno_ft_regs_num = (ARRAY_SIZE(adreno_ft_regs_default) +
+		gpudev->ft_perf_counters_count*2);
+
+	adreno_ft_regs = kcalloc(adreno_ft_regs_num, sizeof(unsigned int),
+		GFP_KERNEL);
+	adreno_ft_regs_val = kcalloc(adreno_ft_regs_num, sizeof(unsigned int),
+		GFP_KERNEL);
+
+	if (adreno_ft_regs == NULL || adreno_ft_regs_val == NULL) {
+		kfree(adreno_ft_regs);
+		kfree(adreno_ft_regs_val);
+
+		adreno_ft_regs = NULL;
+		adreno_ft_regs_val = NULL;
+
+		return;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(adreno_ft_regs_default); i++)
+		adreno_ft_regs[i] = adreno_getreg(adreno_dev,
+			adreno_ft_regs_default[i]);
+
+	set_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv);
+
+	if (val)
+		adreno_fault_detect_start(adreno_dev);
+}
+
+static int adreno_init(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret;
+
+	ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+	if (ret)
+		return ret;
+
+	/*
+	 * initialization only needs to be done once initially until
+	 * device is shutdown
+	 */
+	if (test_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv))
+		return 0;
+
+	/*
+	 * Either the microcode read failed because the usermodehelper isn't
+	 * available or the microcode was corrupted. Fail the init and force
+	 * the user to try the open() again
+	 */
+
+	ret = gpudev->microcode_read(adreno_dev);
+	if (ret)
+		return ret;
+
+	/* Put the GPU in a responsive state */
+	ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+	if (ret)
+		return ret;
+
+	ret = adreno_iommu_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	adreno_perfcounter_init(adreno_dev);
+	adreno_fault_detect_init(adreno_dev);
+
+	/* Power down the device */
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+
+	if (gpudev->init != NULL)
+		gpudev->init(adreno_dev);
+
+	set_bit(ADRENO_DEVICE_INITIALIZED, &adreno_dev->priv);
+
+	/* Use shader offset and length defined in gpudev */
+	if (adreno_dev->gpucore->shader_offset &&
+					adreno_dev->gpucore->shader_size) {
+
+		if (device->shader_mem_phys || device->shader_mem_virt)
+			KGSL_DRV_ERR(device,
+			"Shader memory already specified in device tree\n");
+		else {
+			device->shader_mem_phys = device->reg_phys +
+					adreno_dev->gpucore->shader_offset;
+			device->shader_mem_virt = device->reg_virt +
+					adreno_dev->gpucore->shader_offset;
+			device->shader_mem_len =
+					adreno_dev->gpucore->shader_size;
+		}
+	}
+
+	/*
+	 * Allocate a small chunk of memory for precise drawobj profiling for
+	 * those targets that have the always on timer
+	 */
+
+	if (!adreno_is_a3xx(adreno_dev)) {
+		int r = kgsl_allocate_global(device,
+			&adreno_dev->profile_buffer, PAGE_SIZE,
+			0, 0, "alwayson");
+
+		adreno_dev->profile_index = 0;
+
+		if (r == 0) {
+			set_bit(ADRENO_DEVICE_DRAWOBJ_PROFILE,
+				&adreno_dev->priv);
+			kgsl_sharedmem_set(device,
+				&adreno_dev->profile_buffer, 0, 0,
+				PAGE_SIZE);
+		}
+
+	}
+
+	if (nopreempt == false &&
+		ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION)) {
+		int r = 0;
+
+		if (gpudev->preemption_init)
+			r = gpudev->preemption_init(adreno_dev);
+
+		if (r == 0)
+			set_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+		else
+			WARN(1, "adreno: GPU preemption is disabled\n");
+	}
+
+	return 0;
+}
+
+static bool regulators_left_on(struct kgsl_device *device)
+{
+	int i;
+
+	for (i = 0; i < KGSL_MAX_REGULATORS; i++) {
+		struct kgsl_regulator *regulator =
+			&device->pwrctrl.regulators[i];
+
+		if (IS_ERR_OR_NULL(regulator->reg))
+			break;
+
+		if (regulator_is_enabled(regulator->reg))
+			return true;
+	}
+
+	return false;
+}
+
+static void _set_secvid(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* Program GPU contect protection init values */
+	if (device->mmu.secured) {
+		if (adreno_is_a4xx(adreno_dev))
+			adreno_writereg(adreno_dev,
+				ADRENO_REG_RBBM_SECVID_TRUST_CONFIG, 0x2);
+		adreno_writereg(adreno_dev,
+				ADRENO_REG_RBBM_SECVID_TSB_CONTROL, 0x0);
+
+		adreno_writereg64(adreno_dev,
+			ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE,
+			ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE_HI,
+			KGSL_IOMMU_SECURE_BASE);
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_SIZE,
+			KGSL_IOMMU_SECURE_SIZE);
+	}
+}
+
+/**
+ * _adreno_start - Power up the GPU and prepare to accept commands
+ * @adreno_dev: Pointer to an adreno_device structure
+ *
+ * The core function that powers up and initalizes the GPU.  This function is
+ * called at init and after coming out of SLUMBER
+ */
+static int _adreno_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int status = -EINVAL, ret;
+	unsigned int state = device->state;
+	bool regulator_left_on;
+	unsigned int pmqos_wakeup_vote = device->pwrctrl.pm_qos_wakeup_latency;
+	unsigned int pmqos_active_vote = device->pwrctrl.pm_qos_active_latency;
+
+	/* make sure ADRENO_DEVICE_STARTED is not set here */
+	BUG_ON(test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv));
+
+	pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+			pmqos_wakeup_vote);
+
+	regulator_left_on = regulators_left_on(device);
+
+	/* Clear any GPU faults that might have been left over */
+	adreno_clear_gpu_fault(adreno_dev);
+
+	/* Put the GPU in a responsive state */
+	status = kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+	if (status)
+		goto error_pwr_off;
+
+	/* Set the bit to indicate that we've just powered on */
+	set_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv);
+
+	/* Soft reset the GPU if a regulator is stuck on*/
+	if (regulator_left_on)
+		_soft_reset(adreno_dev);
+
+	adreno_ringbuffer_set_global(adreno_dev, 0);
+
+	status = kgsl_mmu_start(device);
+	if (status)
+		goto error_pwr_off;
+
+	_set_secvid(device);
+
+	status = adreno_ocmem_malloc(adreno_dev);
+	if (status) {
+		KGSL_DRV_ERR(device, "OCMEM malloc failed\n");
+		goto error_mmu_off;
+	}
+
+	/* Enable 64 bit gpu addr if feature is set */
+	if (gpudev->enable_64bit &&
+			adreno_support_64bit(adreno_dev))
+		gpudev->enable_64bit(adreno_dev);
+
+	if (adreno_dev->perfctr_pwr_lo == 0) {
+		ret = adreno_perfcounter_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_PWR, 1,
+			&adreno_dev->perfctr_pwr_lo, NULL,
+			PERFCOUNTER_FLAG_KERNEL);
+
+		if (ret) {
+			KGSL_DRV_ERR(device,
+				"Unable to get the perf counters for DCVS\n");
+			adreno_dev->perfctr_pwr_lo = 0;
+		}
+	}
+
+
+	if (device->pwrctrl.bus_control) {
+		/* VBIF waiting for RAM */
+		if (adreno_dev->starved_ram_lo == 0) {
+			ret = adreno_perfcounter_get(adreno_dev,
+				KGSL_PERFCOUNTER_GROUP_VBIF_PWR, 0,
+				&adreno_dev->starved_ram_lo, NULL,
+				PERFCOUNTER_FLAG_KERNEL);
+
+			if (ret) {
+				KGSL_DRV_ERR(device,
+					"Unable to get perf counters for bus DCVS\n");
+				adreno_dev->starved_ram_lo = 0;
+			}
+		}
+
+		/* VBIF DDR cycles */
+		if (adreno_dev->ram_cycles_lo == 0) {
+			ret = adreno_perfcounter_get(adreno_dev,
+				KGSL_PERFCOUNTER_GROUP_VBIF,
+				VBIF_AXI_TOTAL_BEATS,
+				&adreno_dev->ram_cycles_lo, NULL,
+				PERFCOUNTER_FLAG_KERNEL);
+
+			if (ret) {
+				KGSL_DRV_ERR(device,
+					"Unable to get perf counters for bus DCVS\n");
+				adreno_dev->ram_cycles_lo = 0;
+			}
+		}
+	}
+
+	/* Clear the busy_data stats - we're starting over from scratch */
+	adreno_dev->busy_data.gpu_busy = 0;
+	adreno_dev->busy_data.vbif_ram_cycles = 0;
+	adreno_dev->busy_data.vbif_starved_ram = 0;
+
+	/* Restore performance counter registers with saved values */
+	adreno_perfcounter_restore(adreno_dev);
+
+	/* Start the GPU */
+	gpudev->start(adreno_dev);
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 1);
+
+	adreno_perfcounter_start(adreno_dev);
+
+	/* Clear FSR here in case it is set from a previous pagefault */
+	kgsl_mmu_clear_fsr(&device->mmu);
+
+	status = adreno_ringbuffer_start(adreno_dev, ADRENO_START_COLD);
+	if (status)
+		goto error_mmu_off;
+
+	/* Start the dispatcher */
+	adreno_dispatcher_start(device);
+
+	device->reset_counter++;
+
+	set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+	if (pmqos_active_vote != pmqos_wakeup_vote)
+		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+				pmqos_active_vote);
+
+	return 0;
+
+error_mmu_off:
+	kgsl_mmu_stop(&device->mmu);
+
+error_pwr_off:
+	/* set the state back to original state */
+	kgsl_pwrctrl_change_state(device, state);
+
+	if (pmqos_active_vote != pmqos_wakeup_vote)
+		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+				pmqos_active_vote);
+
+	return status;
+}
+
+/**
+ * adreno_start() - Power up and initialize the GPU
+ * @device: Pointer to the KGSL device to power up
+ * @priority:  Boolean flag to specify of the start should be scheduled in a low
+ * latency work queue
+ *
+ * Power up the GPU and initialize it.  If priority is specified then elevate
+ * the thread priority for the duration of the start operation
+ */
+static int adreno_start(struct kgsl_device *device, int priority)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int nice = task_nice(current);
+	int ret;
+
+	if (priority && (adreno_wake_nice < nice))
+		set_user_nice(current, adreno_wake_nice);
+
+	ret = _adreno_start(adreno_dev);
+
+	if (priority)
+		set_user_nice(current, nice);
+
+	return ret;
+}
+
+/**
+ * adreno_vbif_clear_pending_transactions() - Clear transactions in VBIF pipe
+ * @device: Pointer to the device whose VBIF pipe is to be cleared
+ */
+static int adreno_vbif_clear_pending_transactions(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int mask = gpudev->vbif_xin_halt_ctrl0_mask;
+	unsigned int val;
+	unsigned long wait_for_vbif;
+	int ret = 0;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_VBIF_XIN_HALT_CTRL0, mask);
+	/* wait for the transactions to clear */
+	wait_for_vbif = jiffies + msecs_to_jiffies(100);
+	while (1) {
+		adreno_readreg(adreno_dev,
+			ADRENO_REG_VBIF_XIN_HALT_CTRL1, &val);
+		if ((val & mask) == mask)
+			break;
+		if (time_after(jiffies, wait_for_vbif)) {
+			KGSL_DRV_ERR(device,
+				"Wait limit reached for VBIF XIN Halt\n");
+			ret = -ETIMEDOUT;
+			break;
+		}
+	}
+	adreno_writereg(adreno_dev, ADRENO_REG_VBIF_XIN_HALT_CTRL0, 0);
+	return ret;
+}
+
+static void adreno_set_active_ctxs_null(struct adreno_device *adreno_dev)
+{
+	int i;
+	struct adreno_ringbuffer *rb;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		if (rb->drawctxt_active)
+			kgsl_context_put(&(rb->drawctxt_active->base));
+		rb->drawctxt_active = NULL;
+
+		kgsl_sharedmem_writel(KGSL_DEVICE(adreno_dev),
+			&rb->pagetable_desc, PT_INFO_OFFSET(current_rb_ptname),
+			0);
+	}
+}
+
+static int adreno_stop(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv))
+		return 0;
+
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	adreno_dispatcher_stop(adreno_dev);
+
+	adreno_ringbuffer_stop(adreno_dev);
+
+	kgsl_pwrscale_update_stats(device);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+	adreno_ocmem_free(adreno_dev);
+
+	/* Save active coresight registers if applicable */
+	adreno_coresight_stop(adreno_dev);
+
+	/* Save physical performance counter values before GPU power down*/
+	adreno_perfcounter_save(adreno_dev);
+
+	adreno_vbif_clear_pending_transactions(device);
+
+	kgsl_mmu_stop(&device->mmu);
+
+	clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+	return 0;
+}
+
+static inline bool adreno_try_soft_reset(struct kgsl_device *device, int fault)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/*
+	 * Do not do soft reset for a IOMMU fault (because the IOMMU hardware
+	 * needs a reset too) or for the A304 because it can't do SMMU
+	 * programming of any kind after a soft reset
+	 */
+
+	if ((fault & ADRENO_IOMMU_PAGE_FAULT) || adreno_is_a304(adreno_dev))
+		return false;
+
+	return true;
+}
+
+/**
+ * adreno_reset() - Helper function to reset the GPU
+ * @device: Pointer to the KGSL device structure for the GPU
+ * @fault: Type of fault. Needed to skip soft reset for MMU fault
+ *
+ * Try to reset the GPU to recover from a fault.  First, try to do a low latency
+ * soft reset.  If the soft reset fails for some reason, then bring out the big
+ * guns and toggle the footswitch.
+ */
+int adreno_reset(struct kgsl_device *device, int fault)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret = -EINVAL;
+	int i = 0;
+
+	/* Try soft reset first */
+	if (adreno_try_soft_reset(device, fault)) {
+		/* Make sure VBIF is cleared before resetting */
+		ret = adreno_vbif_clear_pending_transactions(device);
+
+		if (ret == 0) {
+			ret = adreno_soft_reset(device);
+			if (ret)
+				KGSL_DEV_ERR_ONCE(device,
+					"Device soft reset failed\n");
+		}
+	}
+	if (ret) {
+		/* If soft reset failed/skipped, then pull the power */
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+		/* since device is officially off now clear start bit */
+		clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+		/* Keep trying to start the device until it works */
+		for (i = 0; i < NUM_TIMES_RESET_RETRY; i++) {
+			ret = adreno_start(device, 0);
+			if (!ret)
+				break;
+
+			msleep(20);
+		}
+	}
+	if (ret)
+		return ret;
+
+	if (i != 0)
+		KGSL_DRV_WARN(device, "Device hard reset tried %d tries\n", i);
+
+	/*
+	 * If active_cnt is non-zero then the system was active before
+	 * going into a reset - put it back in that state
+	 */
+
+	if (atomic_read(&device->active_cnt))
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+	else
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_NAP);
+
+	return ret;
+}
+
+static int adreno_getproperty(struct kgsl_device *device,
+				unsigned int type,
+				void __user *value,
+				size_t sizebytes)
+{
+	int status = -EINVAL;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	switch (type) {
+	case KGSL_PROP_DEVICE_INFO:
+		{
+			struct kgsl_devinfo devinfo;
+
+			if (sizebytes != sizeof(devinfo)) {
+				status = -EINVAL;
+				break;
+			}
+
+			memset(&devinfo, 0, sizeof(devinfo));
+			devinfo.device_id = device->id+1;
+			devinfo.chip_id = adreno_dev->chipid;
+			devinfo.mmu_enabled =
+				MMU_FEATURE(&device->mmu, KGSL_MMU_PAGED);
+			devinfo.gmem_gpubaseaddr = adreno_dev->gmem_base;
+			devinfo.gmem_sizebytes = adreno_dev->gmem_size;
+
+			if (copy_to_user(value, &devinfo, sizeof(devinfo)) !=
+					0) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_DEVICE_SHADOW:
+		{
+			struct kgsl_shadowprop shadowprop;
+
+			if (sizebytes != sizeof(shadowprop)) {
+				status = -EINVAL;
+				break;
+			}
+			memset(&shadowprop, 0, sizeof(shadowprop));
+			if (device->memstore.hostptr) {
+				/*NOTE: with mmu enabled, gpuaddr doesn't mean
+				 * anything to mmap().
+				 */
+				shadowprop.gpuaddr =
+					(unsigned int) device->memstore.gpuaddr;
+				shadowprop.size = device->memstore.size;
+				/* GSL needs this to be set, even if it
+				 * appears to be meaningless
+				 */
+				shadowprop.flags = KGSL_FLAGS_INITIALIZED |
+					KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS;
+			}
+			if (copy_to_user(value, &shadowprop,
+				sizeof(shadowprop))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_DEVICE_QDSS_STM:
+		{
+			struct kgsl_qdss_stm_prop qdssprop = {0};
+			struct kgsl_memdesc *qdss_desc =
+				kgsl_mmu_get_qdss_global_entry(device);
+
+			if (sizebytes != sizeof(qdssprop)) {
+				status = -EINVAL;
+				break;
+			}
+
+			if (qdss_desc) {
+				qdssprop.gpuaddr = qdss_desc->gpuaddr;
+				qdssprop.size = qdss_desc->size;
+			}
+
+			if (copy_to_user(value, &qdssprop,
+						sizeof(qdssprop))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_MMU_ENABLE:
+		{
+			/* Report MMU only if we can handle paged memory */
+			int mmu_prop = MMU_FEATURE(&device->mmu,
+				KGSL_MMU_PAGED);
+
+			if (sizebytes < sizeof(mmu_prop)) {
+				status = -EINVAL;
+				break;
+			}
+			if (copy_to_user(value, &mmu_prop, sizeof(mmu_prop))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_INTERRUPT_WAITS:
+		{
+			int int_waits = 1;
+
+			if (sizebytes != sizeof(int)) {
+				status = -EINVAL;
+				break;
+			}
+			if (copy_to_user(value, &int_waits, sizeof(int))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_UCHE_GMEM_VADDR:
+		{
+			uint64_t gmem_vaddr = 0;
+
+			if (adreno_is_a5xx(adreno_dev))
+				gmem_vaddr = ADRENO_UCHE_GMEM_BASE;
+			if (sizebytes != sizeof(uint64_t)) {
+				status = -EINVAL;
+				break;
+			}
+			if (copy_to_user(value, &gmem_vaddr,
+					sizeof(uint64_t))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_SP_GENERIC_MEM:
+		{
+			struct kgsl_sp_generic_mem sp_mem;
+
+			if (sizebytes != sizeof(sp_mem)) {
+				status = -EINVAL;
+				break;
+			}
+			memset(&sp_mem, 0, sizeof(sp_mem));
+
+			sp_mem.local = adreno_dev->sp_local_gpuaddr;
+			sp_mem.pvt = adreno_dev->sp_pvt_gpuaddr;
+
+			if (copy_to_user(value, &sp_mem, sizeof(sp_mem))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_UCODE_VERSION:
+		{
+			struct kgsl_ucode_version ucode;
+
+			if (sizebytes != sizeof(ucode)) {
+				status = -EINVAL;
+				break;
+			}
+			memset(&ucode, 0, sizeof(ucode));
+
+			ucode.pfp = adreno_dev->pfp_fw_version;
+			ucode.pm4 = adreno_dev->pm4_fw_version;
+
+			if (copy_to_user(value, &ucode, sizeof(ucode))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_GPMU_VERSION:
+		{
+			struct kgsl_gpmu_version gpmu;
+
+			if (adreno_dev->gpucore == NULL) {
+				status = -EINVAL;
+				break;
+			}
+
+			if (!ADRENO_FEATURE(adreno_dev, ADRENO_GPMU)) {
+				status = -EOPNOTSUPP;
+				break;
+			}
+
+			if (sizebytes != sizeof(gpmu)) {
+				status = -EINVAL;
+				break;
+			}
+			memset(&gpmu, 0, sizeof(gpmu));
+
+			gpmu.major = adreno_dev->gpucore->gpmu_major;
+			gpmu.minor = adreno_dev->gpucore->gpmu_minor;
+			gpmu.features = adreno_dev->gpucore->gpmu_features;
+
+			if (copy_to_user(value, &gpmu, sizeof(gpmu))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_HIGHEST_BANK_BIT:
+		{
+			unsigned int bit;
+
+			if (sizebytes < sizeof(unsigned int)) {
+				status = -EINVAL;
+				break;
+			}
+
+			if (of_property_read_u32(device->pdev->dev.of_node,
+				"qcom,highest-bank-bit", &bit)) {
+				status = -EINVAL;
+				break;
+			}
+
+			if (copy_to_user(value, &bit, sizeof(bit))) {
+				status = -EFAULT;
+				break;
+			}
+		}
+		status = 0;
+		break;
+	case KGSL_PROP_DEVICE_BITNESS:
+	{
+		unsigned int bitness = 32;
+
+		if (sizebytes != sizeof(unsigned int)) {
+			status = -EINVAL;
+			break;
+		}
+		/* No of bits used by the GPU */
+		if (adreno_support_64bit(adreno_dev))
+			bitness = 48;
+
+		if (copy_to_user(value, &bitness,
+				sizeof(unsigned int))) {
+			status = -EFAULT;
+			break;
+		}
+		status = 0;
+	}
+	break;
+
+	default:
+		status = -EINVAL;
+	}
+
+	return status;
+}
+
+int adreno_set_constraint(struct kgsl_device *device,
+				struct kgsl_context *context,
+				struct kgsl_device_constraint *constraint)
+{
+	int status = 0;
+
+	switch (constraint->type) {
+	case KGSL_CONSTRAINT_PWRLEVEL: {
+		struct kgsl_device_constraint_pwrlevel pwr;
+
+		if (constraint->size != sizeof(pwr)) {
+			status = -EINVAL;
+			break;
+		}
+
+		if (copy_from_user(&pwr,
+				(void __user *)constraint->data,
+				sizeof(pwr))) {
+			status = -EFAULT;
+			break;
+		}
+		if (pwr.level >= KGSL_CONSTRAINT_PWR_MAXLEVELS) {
+			status = -EINVAL;
+			break;
+		}
+
+		context->pwr_constraint.type =
+				KGSL_CONSTRAINT_PWRLEVEL;
+		context->pwr_constraint.sub_type = pwr.level;
+		trace_kgsl_user_pwrlevel_constraint(device,
+			context->id,
+			context->pwr_constraint.type,
+			context->pwr_constraint.sub_type);
+		}
+		break;
+	case KGSL_CONSTRAINT_NONE:
+		if (context->pwr_constraint.type == KGSL_CONSTRAINT_PWRLEVEL)
+			trace_kgsl_user_pwrlevel_constraint(device,
+				context->id,
+				KGSL_CONSTRAINT_NONE,
+				context->pwr_constraint.sub_type);
+		context->pwr_constraint.type = KGSL_CONSTRAINT_NONE;
+		break;
+
+	default:
+		status = -EINVAL;
+		break;
+	}
+
+	/* If a new constraint has been set for a context, cancel the old one */
+	if ((status == 0) &&
+		(context->id == device->pwrctrl.constraint.owner_id)) {
+		trace_kgsl_constraint(device, device->pwrctrl.constraint.type,
+					device->pwrctrl.active_pwrlevel, 0);
+		device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE;
+	}
+
+	return status;
+}
+
+static int adreno_setproperty(struct kgsl_device_private *dev_priv,
+				unsigned int type,
+				void __user *value,
+				unsigned int sizebytes)
+{
+	int status = -EINVAL;
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	switch (type) {
+	case KGSL_PROP_PWRCTRL: {
+			unsigned int enable;
+
+			if (sizebytes != sizeof(enable))
+				break;
+
+			if (copy_from_user(&enable, value, sizeof(enable))) {
+				status = -EFAULT;
+				break;
+			}
+
+			mutex_lock(&device->mutex);
+
+			if (enable) {
+				device->pwrctrl.ctrl_flags = 0;
+
+				if (!kgsl_active_count_get(device)) {
+					adreno_fault_detect_start(adreno_dev);
+					kgsl_active_count_put(device);
+				}
+
+				kgsl_pwrscale_enable(device);
+			} else {
+				kgsl_pwrctrl_change_state(device,
+							KGSL_STATE_ACTIVE);
+				device->pwrctrl.ctrl_flags = KGSL_PWR_ON;
+				adreno_fault_detect_stop(adreno_dev);
+				kgsl_pwrscale_disable(device, true);
+			}
+
+			mutex_unlock(&device->mutex);
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_PWR_CONSTRAINT: {
+			struct kgsl_device_constraint constraint;
+			struct kgsl_context *context;
+
+			if (sizebytes != sizeof(constraint))
+				break;
+
+			if (copy_from_user(&constraint, value,
+				sizeof(constraint))) {
+				status = -EFAULT;
+				break;
+			}
+
+			context = kgsl_context_get_owner(dev_priv,
+							constraint.context_id);
+
+			if (context == NULL)
+				break;
+
+			status = adreno_set_constraint(device, context,
+								&constraint);
+
+			kgsl_context_put(context);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return status;
+}
+
+/*
+ * adreno_irq_pending() - Checks if interrupt is generated by h/w
+ * @adreno_dev: Pointer to device whose interrupts are checked
+ *
+ * Returns true if interrupts are pending from device else 0.
+ */
+inline unsigned int adreno_irq_pending(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int status;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_INT_0_STATUS, &status);
+
+	return (status & gpudev->irq->mask) ? 1 : 0;
+}
+
+
+/**
+ * adreno_hw_isidle() - Check if the GPU core is idle
+ * @adreno_dev: Pointer to the Adreno device structure for the GPU
+ *
+ * Return true if the RBBM status register for the GPU type indicates that the
+ * hardware is idle
+ */
+bool adreno_hw_isidle(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *gpucore = adreno_dev->gpucore;
+	unsigned int reg_rbbm_status;
+
+	if (adreno_is_a540(adreno_dev))
+		/**
+		 * Due to CRC idle throttling GPU
+		 * idle hysteresys can take up to
+		 * 3usec for expire - account for it
+		 */
+		udelay(5);
+
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS,
+		&reg_rbbm_status);
+
+	if (reg_rbbm_status & gpucore->busy_mask)
+		return false;
+
+	/* Don't consider ourselves idle if there is an IRQ pending */
+	if (adreno_irq_pending(adreno_dev))
+		return false;
+
+	return true;
+}
+
+/**
+ * adreno_soft_reset() -  Do a soft reset of the GPU hardware
+ * @device: KGSL device to soft reset
+ *
+ * "soft reset" the GPU hardware - this is a fast path GPU reset
+ * The GPU hardware is reset but we never pull power so we can skip
+ * a lot of the standard adreno_stop/adreno_start sequence
+ */
+static int adreno_soft_reset(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int ret;
+
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+	adreno_set_active_ctxs_null(adreno_dev);
+
+	adreno_irqctrl(adreno_dev, 0);
+
+	adreno_clear_gpu_fault(adreno_dev);
+	/* since device is oficially off now clear start bit */
+	clear_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+
+	/* save physical performance counter values before GPU soft reset */
+	adreno_perfcounter_save(adreno_dev);
+
+	/* Reset the GPU */
+	_soft_reset(adreno_dev);
+
+	/* Set the page table back to the default page table */
+	adreno_ringbuffer_set_global(adreno_dev, 0);
+	kgsl_mmu_set_pt(&device->mmu, device->mmu.defaultpagetable);
+
+	_set_secvid(device);
+
+	/* Enable 64 bit gpu addr if feature is set */
+	if (gpudev->enable_64bit &&
+			adreno_support_64bit(adreno_dev))
+		gpudev->enable_64bit(adreno_dev);
+
+
+	/* Reinitialize the GPU */
+	gpudev->start(adreno_dev);
+
+	/* Re-initialize the coresight registers if applicable */
+	adreno_coresight_start(adreno_dev);
+
+	/* Enable IRQ */
+	adreno_irqctrl(adreno_dev, 1);
+
+	/* stop all ringbuffers to cancel RB events */
+	adreno_ringbuffer_stop(adreno_dev);
+	/*
+	 * If we have offsets for the jump tables we can try to do a warm start,
+	 * otherwise do a full ringbuffer restart
+	 */
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_WARM_START))
+		ret = adreno_ringbuffer_start(adreno_dev, ADRENO_START_WARM);
+	else
+		ret = adreno_ringbuffer_start(adreno_dev, ADRENO_START_COLD);
+	if (ret == 0) {
+		device->reset_counter++;
+		set_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv);
+	}
+
+	/* Restore physical performance counter values after soft reset */
+	adreno_perfcounter_restore(adreno_dev);
+
+	return ret;
+}
+
+/*
+ * adreno_isidle() - return true if the GPU hardware is idle
+ * @device: Pointer to the KGSL device structure for the GPU
+ *
+ * Return true if the GPU hardware is idle and there are no commands pending in
+ * the ringbuffer
+ */
+bool adreno_isidle(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_ringbuffer *rb;
+	int i;
+
+	if (!kgsl_state_is_awake(device))
+		return true;
+
+	/*
+	 * wptr is updated when we add commands to ringbuffer, add a barrier
+	 * to make sure updated wptr is compared to rptr
+	 */
+	smp_mb();
+
+	/*
+	 * ringbuffer is truly idle when all ringbuffers read and write
+	 * pointers are equal
+	 */
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		if (!adreno_rb_empty(rb))
+			return false;
+	}
+
+	return adreno_hw_isidle(adreno_dev);
+}
+
+/**
+ * adreno_spin_idle() - Spin wait for the GPU to idle
+ * @adreno_dev: Pointer to an adreno device
+ * @timeout: milliseconds to wait before returning error
+ *
+ * Spin the CPU waiting for the RBBM status to return idle
+ */
+int adreno_spin_idle(struct adreno_device *adreno_dev, unsigned int timeout)
+{
+	unsigned long wait = jiffies + msecs_to_jiffies(timeout);
+
+	do {
+		/*
+		 * If we fault, stop waiting and return an error. The dispatcher
+		 * will clean up the fault from the work queue, but we need to
+		 * make sure we don't block it by waiting for an idle that
+		 * will never come.
+		 */
+
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			return -EDEADLK;
+
+		if (adreno_isidle(KGSL_DEVICE(adreno_dev)))
+			return 0;
+
+	} while (time_before(jiffies, wait));
+
+	/*
+	 * Under rare conditions, preemption can cause the while loop to exit
+	 * without checking if the gpu is idle. check one last time before we
+	 * return failure.
+	 */
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EDEADLK;
+
+	if (adreno_isidle(KGSL_DEVICE(adreno_dev)))
+		return 0;
+
+	return -ETIMEDOUT;
+}
+
+/**
+ * adreno_idle() - wait for the GPU hardware to go idle
+ * @device: Pointer to the KGSL device structure for the GPU
+ *
+ * Wait up to ADRENO_IDLE_TIMEOUT milliseconds for the GPU hardware to go quiet.
+ * Caller must hold the device mutex, and must not hold the dispatcher mutex.
+ */
+
+int adreno_idle(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret;
+
+	/*
+	 * Make sure the device mutex is held so the dispatcher can't send any
+	 * more commands to the hardware
+	 */
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EDEADLK;
+
+	/* Check if we are already idle before idling dispatcher */
+	if (adreno_isidle(device))
+		return 0;
+	/*
+	 * Wait for dispatcher to finish completing commands
+	 * already submitted
+	 */
+	ret = adreno_dispatcher_idle(adreno_dev);
+	if (ret)
+		return ret;
+
+	return adreno_spin_idle(adreno_dev, ADRENO_IDLE_TIMEOUT);
+}
+
+/**
+ * adreno_drain() - Drain the dispatch queue
+ * @device: Pointer to the KGSL device structure for the GPU
+ *
+ * Drain the dispatcher of existing drawobjs.  This halts
+ * additional commands from being issued until the gate is completed.
+ */
+static int adreno_drain(struct kgsl_device *device)
+{
+	reinit_completion(&device->halt_gate);
+
+	return 0;
+}
+
+/* Caller must hold the device mutex. */
+static int adreno_suspend_context(struct kgsl_device *device)
+{
+	/* process any profiling results that are available */
+	adreno_profile_process_results(ADRENO_DEVICE(device));
+
+	/* Wait for the device to go idle */
+	return adreno_idle(device);
+}
+
+/**
+ * adreno_read - General read function to read adreno device memory
+ * @device - Pointer to the GPU device struct (for adreno device)
+ * @base - Base address (kernel virtual) where the device memory is mapped
+ * @offsetwords - Offset in words from the base address, of the memory that
+ * is to be read
+ * @value - Value read from the device memory
+ * @mem_len - Length of the device memory mapped to the kernel
+ */
+static void adreno_read(struct kgsl_device *device, void __iomem *base,
+		unsigned int offsetwords, unsigned int *value,
+		unsigned int mem_len)
+{
+
+	void __iomem *reg;
+
+	/* Make sure we're not reading from invalid memory */
+	if (WARN(offsetwords * sizeof(uint32_t) >= mem_len,
+		"Out of bounds register read: 0x%x/0x%x\n",
+			offsetwords, mem_len >> 2))
+		return;
+
+	reg = (base + (offsetwords << 2));
+
+	if (!in_interrupt())
+		kgsl_pre_hwaccess(device);
+
+	*value = __raw_readl(reg);
+	/*
+	 * ensure this read finishes before the next one.
+	 * i.e. act like normal readl()
+	 */
+	rmb();
+}
+
+/**
+ * adreno_regread - Used to read adreno device registers
+ * @offsetwords - Word (4 Bytes) offset to the register to be read
+ * @value - Value read from device register
+ */
+static void adreno_regread(struct kgsl_device *device, unsigned int offsetwords,
+	unsigned int *value)
+{
+	adreno_read(device, device->reg_virt, offsetwords, value,
+						device->reg_len);
+}
+
+/**
+ * adreno_shadermem_regread - Used to read GPU (adreno) shader memory
+ * @device - GPU device whose shader memory is to be read
+ * @offsetwords - Offset in words, of the shader memory address to be read
+ * @value - Pointer to where the read shader mem value is to be stored
+ */
+void adreno_shadermem_regread(struct kgsl_device *device,
+	unsigned int offsetwords, unsigned int *value)
+{
+	adreno_read(device, device->shader_mem_virt, offsetwords, value,
+					device->shader_mem_len);
+}
+
+static void adreno_regwrite(struct kgsl_device *device,
+				unsigned int offsetwords,
+				unsigned int value)
+{
+	void __iomem *reg;
+
+	/* Make sure we're not writing to an invalid register */
+	if (WARN(offsetwords * sizeof(uint32_t) >= device->reg_len,
+		"Out of bounds register write: 0x%x/0x%x\n",
+			offsetwords, device->reg_len >> 2))
+		return;
+
+	if (!in_interrupt())
+		kgsl_pre_hwaccess(device);
+
+	trace_kgsl_regwrite(device, offsetwords, value);
+
+	reg = (device->reg_virt + (offsetwords << 2));
+
+	/*
+	 * ensure previous writes post before this one,
+	 * i.e. act like normal writel()
+	 */
+	wmb();
+	__raw_writel(value, reg);
+}
+
+/**
+ * adreno_waittimestamp - sleep while waiting for the specified timestamp
+ * @device - pointer to a KGSL device structure
+ * @context - pointer to the active kgsl context
+ * @timestamp - GPU timestamp to wait for
+ * @msecs - amount of time to wait (in milliseconds)
+ *
+ * Wait up to 'msecs' milliseconds for the specified timestamp to expire.
+ */
+static int adreno_waittimestamp(struct kgsl_device *device,
+		struct kgsl_context *context,
+		unsigned int timestamp,
+		unsigned int msecs)
+{
+	int ret;
+
+	if (context == NULL) {
+		/* If they are doing then complain once */
+		dev_WARN_ONCE(device->dev, 1,
+			"IOCTL_KGSL_DEVICE_WAITTIMESTAMP is deprecated\n");
+		return -ENOTTY;
+	}
+
+	/* Return -ENOENT if the context has been detached */
+	if (kgsl_context_detached(context))
+		return -ENOENT;
+
+	ret = adreno_drawctxt_wait(ADRENO_DEVICE(device), context,
+		timestamp, msecs);
+
+	/* If the context got invalidated then return a specific error */
+	if (kgsl_context_invalid(context))
+		ret = -EDEADLK;
+
+	/*
+	 * Return -EPROTO if the device has faulted since the last time we
+	 * checked.  Userspace uses this as a marker for performing post
+	 * fault activities
+	 */
+
+	if (!ret && test_and_clear_bit(ADRENO_CONTEXT_FAULT, &context->priv))
+		ret = -EPROTO;
+
+	return ret;
+}
+
+/**
+ * __adreno_readtimestamp() - Reads the timestamp from memstore memory
+ * @adreno_dev: Pointer to an adreno device
+ * @index: Index into the memstore memory
+ * @type: Type of timestamp to read
+ * @timestamp: The out parameter where the timestamp is read
+ */
+static int __adreno_readtimestamp(struct adreno_device *adreno_dev, int index,
+				int type, unsigned int *timestamp)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int status = 0;
+
+	switch (type) {
+	case KGSL_TIMESTAMP_CONSUMED:
+		kgsl_sharedmem_readl(&device->memstore, timestamp,
+			KGSL_MEMSTORE_OFFSET(index, soptimestamp));
+		break;
+	case KGSL_TIMESTAMP_RETIRED:
+		kgsl_sharedmem_readl(&device->memstore, timestamp,
+			KGSL_MEMSTORE_OFFSET(index, eoptimestamp));
+		break;
+	default:
+		status = -EINVAL;
+		*timestamp = 0;
+		break;
+	}
+	return status;
+}
+
+/**
+ * adreno_rb_readtimestamp(): Return the value of given type of timestamp
+ * for a RB
+ * @adreno_dev: adreno device whose timestamp values are being queried
+ * @priv: The object being queried for a timestamp (expected to be a rb pointer)
+ * @type: The type of timestamp (one of 3) to be read
+ * @timestamp: Pointer to where the read timestamp is to be written to
+ *
+ * CONSUMED and RETIRED type timestamps are sorted by id and are constantly
+ * updated by the GPU through shared memstore memory. QUEUED type timestamps
+ * are read directly from context struct.
+
+ * The function returns 0 on success and timestamp value at the *timestamp
+ * address and returns -EINVAL on any read error/invalid type and timestamp = 0.
+ */
+int adreno_rb_readtimestamp(struct adreno_device *adreno_dev,
+		void *priv, enum kgsl_timestamp_type type,
+		unsigned int *timestamp)
+{
+	int status = 0;
+	struct adreno_ringbuffer *rb = priv;
+
+	if (type == KGSL_TIMESTAMP_QUEUED)
+		*timestamp = rb->timestamp;
+	else
+		status = __adreno_readtimestamp(adreno_dev,
+				rb->id + KGSL_MEMSTORE_MAX,
+				type, timestamp);
+
+	return status;
+}
+
+/**
+ * adreno_readtimestamp(): Return the value of given type of timestamp
+ * @device: GPU device whose timestamp values are being queried
+ * @priv: The object being queried for a timestamp (expected to be a context)
+ * @type: The type of timestamp (one of 3) to be read
+ * @timestamp: Pointer to where the read timestamp is to be written to
+ *
+ * CONSUMED and RETIRED type timestamps are sorted by id and are constantly
+ * updated by the GPU through shared memstore memory. QUEUED type timestamps
+ * are read directly from context struct.
+
+ * The function returns 0 on success and timestamp value at the *timestamp
+ * address and returns -EINVAL on any read error/invalid type and timestamp = 0.
+ */
+static int adreno_readtimestamp(struct kgsl_device *device,
+		void *priv, enum kgsl_timestamp_type type,
+		unsigned int *timestamp)
+{
+	int status = 0;
+	struct kgsl_context *context = priv;
+
+	if (type == KGSL_TIMESTAMP_QUEUED) {
+		struct adreno_context *ctxt = ADRENO_CONTEXT(context);
+
+		*timestamp = ctxt->timestamp;
+	} else
+		status = __adreno_readtimestamp(ADRENO_DEVICE(device),
+				context->id, type, timestamp);
+
+	return status;
+}
+
+static inline s64 adreno_ticks_to_us(u32 ticks, u32 freq)
+{
+	freq /= 1000000;
+	return ticks / freq;
+}
+
+/**
+ * adreno_power_stats() - Reads the counters needed for freq decisions
+ * @device: Pointer to device whose counters are read
+ * @stats: Pointer to stats set that needs updating
+ * Power: The caller is expected to be in a clock enabled state as this
+ * function does reg reads
+ */
+static void adreno_power_stats(struct kgsl_device *device,
+				struct kgsl_power_stats *stats)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct adreno_busy_data *busy = &adreno_dev->busy_data;
+	uint64_t adj = 0;
+
+	memset(stats, 0, sizeof(*stats));
+
+	/* Get the busy cycles counted since the counter was last reset */
+	if (adreno_dev->perfctr_pwr_lo != 0) {
+		uint64_t gpu_busy;
+
+		gpu_busy = counter_delta(device, adreno_dev->perfctr_pwr_lo,
+			&busy->gpu_busy);
+
+		if (gpudev->read_throttling_counters) {
+			adj = gpudev->read_throttling_counters(adreno_dev);
+			gpu_busy += adj;
+		}
+
+		stats->busy_time = adreno_ticks_to_us(gpu_busy,
+			kgsl_pwrctrl_active_freq(pwr));
+	}
+
+	if (device->pwrctrl.bus_control) {
+		uint64_t ram_cycles = 0, starved_ram = 0;
+
+		if (adreno_dev->ram_cycles_lo != 0)
+			ram_cycles = counter_delta(device,
+				adreno_dev->ram_cycles_lo,
+				&busy->vbif_ram_cycles);
+
+		if (adreno_dev->starved_ram_lo != 0)
+			starved_ram = counter_delta(device,
+				adreno_dev->starved_ram_lo,
+				&busy->vbif_starved_ram);
+
+		stats->ram_time = ram_cycles;
+		stats->ram_wait = starved_ram;
+	}
+	if (adreno_dev->lm_threshold_count &&
+			gpudev->count_throttles)
+		gpudev->count_throttles(adreno_dev, adj);
+}
+
+static unsigned int adreno_gpuid(struct kgsl_device *device,
+	unsigned int *chipid)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/*
+	 * Some applications need to know the chip ID too, so pass
+	 * that as a parameter
+	 */
+
+	if (chipid != NULL)
+		*chipid = adreno_dev->chipid;
+
+	/*
+	 * Standard KGSL gpuid format:
+	 * top word is 0x0002 for 2D or 0x0003 for 3D
+	 * Bottom word is core specific identifer
+	 */
+
+	return (0x0003 << 16) | ADRENO_GPUREV(adreno_dev);
+}
+
+static int adreno_regulator_enable(struct kgsl_device *device)
+{
+	int ret = 0;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->regulator_enable &&
+		!test_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+			&adreno_dev->priv)) {
+		ret = gpudev->regulator_enable(adreno_dev);
+		if (!ret)
+			set_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+				&adreno_dev->priv);
+	}
+	return ret;
+}
+
+static bool adreno_is_hw_collapsible(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	/*
+	 * Skip power collapse for A304, if power ctrl flag is set to
+	 * non zero. As A304 soft_reset will not work, power collapse
+	 * needs to disable to avoid soft_reset.
+	 */
+	if (adreno_is_a304(adreno_dev) &&
+			device->pwrctrl.ctrl_flags)
+		return false;
+
+	return adreno_isidle(device) && (gpudev->is_sptp_idle ?
+				gpudev->is_sptp_idle(adreno_dev) : true);
+}
+
+static void adreno_regulator_disable(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->regulator_disable &&
+		test_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+			&adreno_dev->priv)) {
+		gpudev->regulator_disable(adreno_dev);
+		clear_bit(ADRENO_DEVICE_GPU_REGULATOR_ENABLED,
+			&adreno_dev->priv);
+	}
+}
+
+static void adreno_pwrlevel_change_settings(struct kgsl_device *device,
+		unsigned int prelevel, unsigned int postlevel, bool post)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->pwrlevel_change_settings)
+		gpudev->pwrlevel_change_settings(adreno_dev, prelevel,
+					postlevel, post);
+}
+
+static void adreno_clk_set_options(struct kgsl_device *device, const char *name,
+	struct clk *clk)
+{
+	if (ADRENO_GPU_DEVICE(ADRENO_DEVICE(device))->clk_set_options)
+		ADRENO_GPU_DEVICE(ADRENO_DEVICE(device))->clk_set_options(
+			ADRENO_DEVICE(device), name, clk);
+}
+
+static void adreno_iommu_sync(struct kgsl_device *device, bool sync)
+{
+	struct scm_desc desc = {0};
+	int ret;
+
+	if (sync == true) {
+		mutex_lock(&kgsl_mmu_sync);
+		desc.args[0] = true;
+		desc.arginfo = SCM_ARGS(1);
+		ret = scm_call2_atomic(SCM_SIP_FNID(SCM_SVC_PWR, 0x8), &desc);
+		if (ret)
+			KGSL_DRV_ERR(device,
+				"MMU sync with Hypervisor off %x\n", ret);
+	} else {
+		desc.args[0] = false;
+		desc.arginfo = SCM_ARGS(1);
+		scm_call2_atomic(SCM_SIP_FNID(SCM_SVC_PWR, 0x8), &desc);
+		mutex_unlock(&kgsl_mmu_sync);
+	}
+}
+
+static void _regulator_disable(struct kgsl_regulator *regulator, bool poll)
+{
+	unsigned long wait_time = jiffies + msecs_to_jiffies(200);
+
+	if (IS_ERR_OR_NULL(regulator->reg))
+		return;
+
+	regulator_disable(regulator->reg);
+
+	if (poll == false)
+		return;
+
+	while (!time_after(jiffies, wait_time)) {
+		if (!regulator_is_enabled(regulator->reg))
+			return;
+		cpu_relax();
+	}
+
+	KGSL_CORE_ERR("regulator '%s' still on after 200ms\n", regulator->name);
+}
+
+static void adreno_regulator_disable_poll(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int i;
+
+	/* Fast path - hopefully we don't need this quirk */
+	if (!ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_IOMMU_SYNC)) {
+		for (i = KGSL_MAX_REGULATORS - 1; i >= 0; i--)
+			_regulator_disable(&pwr->regulators[i], false);
+		return;
+	}
+
+	adreno_iommu_sync(device, true);
+
+	for (i = 0; i < KGSL_MAX_REGULATORS; i++)
+		_regulator_disable(&pwr->regulators[i], true);
+
+	adreno_iommu_sync(device, false);
+}
+
+static void adreno_gpu_model(struct kgsl_device *device, char *str,
+				size_t bufsz)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	snprintf(str, bufsz, "Adreno%d%d%dv%d",
+			ADRENO_CHIPID_CORE(adreno_dev->chipid),
+			 ADRENO_CHIPID_MAJOR(adreno_dev->chipid),
+			 ADRENO_CHIPID_MINOR(adreno_dev->chipid),
+			 ADRENO_CHIPID_PATCH(adreno_dev->chipid) + 1);
+}
+
+static const struct kgsl_functable adreno_functable = {
+	/* Mandatory functions */
+	.regread = adreno_regread,
+	.regwrite = adreno_regwrite,
+	.idle = adreno_idle,
+	.isidle = adreno_isidle,
+	.suspend_context = adreno_suspend_context,
+	.init = adreno_init,
+	.start = adreno_start,
+	.stop = adreno_stop,
+	.getproperty = adreno_getproperty,
+	.getproperty_compat = adreno_getproperty_compat,
+	.waittimestamp = adreno_waittimestamp,
+	.readtimestamp = adreno_readtimestamp,
+	.queue_cmds = adreno_dispatcher_queue_cmds,
+	.ioctl = adreno_ioctl,
+	.compat_ioctl = adreno_compat_ioctl,
+	.power_stats = adreno_power_stats,
+	.gpuid = adreno_gpuid,
+	.snapshot = adreno_snapshot,
+	.irq_handler = adreno_irq_handler,
+	.drain = adreno_drain,
+	/* Optional functions */
+	.drawctxt_create = adreno_drawctxt_create,
+	.drawctxt_detach = adreno_drawctxt_detach,
+	.drawctxt_destroy = adreno_drawctxt_destroy,
+	.drawctxt_dump = adreno_drawctxt_dump,
+	.setproperty = adreno_setproperty,
+	.setproperty_compat = adreno_setproperty_compat,
+	.drawctxt_sched = adreno_drawctxt_sched,
+	.resume = adreno_dispatcher_start,
+	.regulator_enable = adreno_regulator_enable,
+	.is_hw_collapsible = adreno_is_hw_collapsible,
+	.regulator_disable = adreno_regulator_disable,
+	.pwrlevel_change_settings = adreno_pwrlevel_change_settings,
+	.regulator_disable_poll = adreno_regulator_disable_poll,
+	.clk_set_options = adreno_clk_set_options,
+	.gpu_model = adreno_gpu_model,
+};
+
+static struct platform_driver adreno_platform_driver = {
+	.probe = adreno_probe,
+	.remove = adreno_remove,
+	.suspend = kgsl_suspend_driver,
+	.resume = kgsl_resume_driver,
+	.id_table = adreno_id_table,
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = DEVICE_3D_NAME,
+		.pm = &kgsl_pm_ops,
+		.of_match_table = adreno_match_table,
+	}
+};
+
+static const struct of_device_id busmon_match_table[] = {
+	{ .compatible = "qcom,kgsl-busmon", .data = &device_3d0 },
+	{}
+};
+
+static int adreno_busmon_probe(struct platform_device *pdev)
+{
+	struct kgsl_device *device;
+	const struct of_device_id *pdid =
+			of_match_device(busmon_match_table, &pdev->dev);
+
+	if (pdid == NULL)
+		return -ENXIO;
+
+	device = (struct kgsl_device *)pdid->data;
+	device->busmondev = &pdev->dev;
+	dev_set_drvdata(device->busmondev, device);
+
+	return 0;
+}
+
+static struct platform_driver kgsl_bus_platform_driver = {
+	.probe = adreno_busmon_probe,
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "kgsl-busmon",
+		.of_match_table = busmon_match_table,
+	}
+};
+
+static int __init kgsl_3d_init(void)
+{
+	int ret;
+
+	ret = platform_driver_register(&kgsl_bus_platform_driver);
+	if (ret)
+		return ret;
+
+	ret = platform_driver_register(&adreno_platform_driver);
+	if (ret)
+		platform_driver_unregister(&kgsl_bus_platform_driver);
+
+	return ret;
+}
+
+static void __exit kgsl_3d_exit(void)
+{
+	platform_driver_unregister(&adreno_platform_driver);
+	platform_driver_unregister(&kgsl_bus_platform_driver);
+}
+
+module_init(kgsl_3d_init);
+module_exit(kgsl_3d_exit);
+
+MODULE_DESCRIPTION("3D Graphics driver");
+MODULE_LICENSE("GPL v2");
+MODULE_ALIAS("platform:kgsl_3d");
diff --git a/drivers/gpu/msm/adreno.h b/drivers/gpu/msm/adreno.h
new file mode 100644
index 0000000..a14cf58
--- /dev/null
+++ b/drivers/gpu/msm/adreno.h
@@ -0,0 +1,1545 @@
+/* Copyright (c) 2008-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_H
+#define __ADRENO_H
+
+#include "kgsl_device.h"
+#include "kgsl_sharedmem.h"
+#include "adreno_drawctxt.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_profile.h"
+#include "adreno_dispatch.h"
+#include "kgsl_iommu.h"
+#include "adreno_perfcounter.h"
+#include <linux/stat.h>
+#include <linux/delay.h>
+
+#include "a4xx_reg.h"
+
+#ifdef CONFIG_QCOM_OCMEM
+#include <soc/qcom/ocmem.h>
+#endif
+
+#define DEVICE_3D_NAME "kgsl-3d"
+#define DEVICE_3D0_NAME "kgsl-3d0"
+
+/* ADRENO_DEVICE - Given a kgsl_device return the adreno device struct */
+#define ADRENO_DEVICE(device) \
+		container_of(device, struct adreno_device, dev)
+
+/* KGSL_DEVICE - given an adreno_device, return the KGSL device struct */
+#define KGSL_DEVICE(_dev) (&((_dev)->dev))
+
+/* ADRENO_CONTEXT - Given a context return the adreno context struct */
+#define ADRENO_CONTEXT(context) \
+		container_of(context, struct adreno_context, base)
+
+/* ADRENO_GPU_DEVICE - Given an adreno device return the GPU specific struct */
+#define ADRENO_GPU_DEVICE(_a) ((_a)->gpucore->gpudev)
+
+#define ADRENO_CHIPID_CORE(_id) (((_id) >> 24) & 0xFF)
+#define ADRENO_CHIPID_MAJOR(_id) (((_id) >> 16) & 0xFF)
+#define ADRENO_CHIPID_MINOR(_id) (((_id) >> 8) & 0xFF)
+#define ADRENO_CHIPID_PATCH(_id) ((_id) & 0xFF)
+
+/* ADRENO_GPUREV - Return the GPU ID for the given adreno_device */
+#define ADRENO_GPUREV(_a) ((_a)->gpucore->gpurev)
+
+/*
+ * ADRENO_FEATURE - return true if the specified feature is supported by the GPU
+ * core
+ */
+#define ADRENO_FEATURE(_dev, _bit) \
+	((_dev)->gpucore->features & (_bit))
+
+/**
+ * ADRENO_QUIRK - return true if the specified quirk is required by the GPU
+ */
+#define ADRENO_QUIRK(_dev, _bit) \
+	((_dev)->quirks & (_bit))
+
+/*
+ * ADRENO_PREEMPT_STYLE - return preemption style
+ */
+#define ADRENO_PREEMPT_STYLE(flags) \
+	((flags & KGSL_CONTEXT_PREEMPT_STYLE_MASK) >> \
+		  KGSL_CONTEXT_PREEMPT_STYLE_SHIFT)
+
+/*
+ * return the dispatcher drawqueue in which the given drawobj should
+ * be submitted
+ */
+#define ADRENO_DRAWOBJ_DISPATCH_DRAWQUEUE(c)	\
+	(&((ADRENO_CONTEXT(c->context))->rb->dispatch_q))
+
+#define ADRENO_DRAWOBJ_RB(c)			\
+	((ADRENO_CONTEXT(c->context))->rb)
+
+/* Adreno core features */
+/* The core uses OCMEM for GMEM/binning memory */
+#define ADRENO_USES_OCMEM     BIT(0)
+/* The core supports an accelerated warm start */
+#define ADRENO_WARM_START     BIT(1)
+/* The core supports the microcode bootstrap functionality */
+#define ADRENO_USE_BOOTSTRAP  BIT(2)
+/* The core supports SP/TP hw controlled power collapse */
+#define ADRENO_SPTP_PC BIT(3)
+/* The core supports Peak Power Detection(PPD)*/
+#define ADRENO_PPD BIT(4)
+/* The GPU supports content protection */
+#define ADRENO_CONTENT_PROTECTION BIT(5)
+/* The GPU supports preemption */
+#define ADRENO_PREEMPTION BIT(6)
+/* The core uses GPMU for power and limit management */
+#define ADRENO_GPMU BIT(7)
+/* The GPMU supports Limits Management */
+#define ADRENO_LM BIT(8)
+/* The core uses 64 bit GPU addresses */
+#define ADRENO_64BIT BIT(9)
+/* The GPU supports retention for cpz registers */
+#define ADRENO_CPZ_RETENTION BIT(10)
+
+/*
+ * Adreno GPU quirks - control bits for various workarounds
+ */
+
+/* Set TWOPASSUSEWFI in PC_DBG_ECO_CNTL (5XX) */
+#define ADRENO_QUIRK_TWO_PASS_USE_WFI BIT(0)
+/* Lock/unlock mutex to sync with the IOMMU */
+#define ADRENO_QUIRK_IOMMU_SYNC BIT(1)
+/* Submit critical packets at GPU wake up */
+#define ADRENO_QUIRK_CRITICAL_PACKETS BIT(2)
+/* Mask out RB1-3 activity signals from HW hang detection logic */
+#define ADRENO_QUIRK_FAULT_DETECT_MASK BIT(3)
+/* Disable RB sampler datapath clock gating optimization */
+#define ADRENO_QUIRK_DISABLE_RB_DP2CLOCKGATING BIT(4)
+/* Disable local memory(LM) feature to avoid corner case error */
+#define ADRENO_QUIRK_DISABLE_LMLOADKILL BIT(5)
+
+/* Flags to control command packet settings */
+#define KGSL_CMD_FLAGS_NONE             0
+#define KGSL_CMD_FLAGS_PMODE		BIT(0)
+#define KGSL_CMD_FLAGS_INTERNAL_ISSUE   BIT(1)
+#define KGSL_CMD_FLAGS_WFI              BIT(2)
+#define KGSL_CMD_FLAGS_PROFILE		BIT(3)
+#define KGSL_CMD_FLAGS_PWRON_FIXUP      BIT(4)
+
+/* Command identifiers */
+#define KGSL_CONTEXT_TO_MEM_IDENTIFIER	0x2EADBEEF
+#define KGSL_CMD_IDENTIFIER		0x2EEDFACE
+#define KGSL_CMD_INTERNAL_IDENTIFIER	0x2EEDD00D
+#define KGSL_START_OF_IB_IDENTIFIER	0x2EADEABE
+#define KGSL_END_OF_IB_IDENTIFIER	0x2ABEDEAD
+#define KGSL_START_OF_PROFILE_IDENTIFIER	0x2DEFADE1
+#define KGSL_END_OF_PROFILE_IDENTIFIER	0x2DEFADE2
+#define KGSL_PWRON_FIXUP_IDENTIFIER	0x2AFAFAFA
+
+/* One cannot wait forever for the core to idle, so set an upper limit to the
+ * amount of time to wait for the core to go idle
+ */
+
+#define ADRENO_IDLE_TIMEOUT (20 * 1000)
+
+#define ADRENO_UCHE_GMEM_BASE	0x100000
+
+enum adreno_gpurev {
+	ADRENO_REV_UNKNOWN = 0,
+	ADRENO_REV_A304 = 304,
+	ADRENO_REV_A305 = 305,
+	ADRENO_REV_A305C = 306,
+	ADRENO_REV_A306 = 307,
+	ADRENO_REV_A306A = 308,
+	ADRENO_REV_A310 = 310,
+	ADRENO_REV_A320 = 320,
+	ADRENO_REV_A330 = 330,
+	ADRENO_REV_A305B = 335,
+	ADRENO_REV_A405 = 405,
+	ADRENO_REV_A418 = 418,
+	ADRENO_REV_A420 = 420,
+	ADRENO_REV_A430 = 430,
+	ADRENO_REV_A505 = 505,
+	ADRENO_REV_A506 = 506,
+	ADRENO_REV_A510 = 510,
+	ADRENO_REV_A512 = 512,
+	ADRENO_REV_A530 = 530,
+	ADRENO_REV_A540 = 540,
+};
+
+#define ADRENO_START_WARM 0
+#define ADRENO_START_COLD 1
+
+#define ADRENO_SOFT_FAULT BIT(0)
+#define ADRENO_HARD_FAULT BIT(1)
+#define ADRENO_TIMEOUT_FAULT BIT(2)
+#define ADRENO_IOMMU_PAGE_FAULT BIT(3)
+#define ADRENO_PREEMPT_FAULT BIT(4)
+
+#define ADRENO_SPTP_PC_CTRL 0
+#define ADRENO_PPD_CTRL     1
+#define ADRENO_LM_CTRL      2
+#define ADRENO_HWCG_CTRL    3
+#define ADRENO_THROTTLING_CTRL 4
+
+
+/* number of throttle counters for DCVS adjustment */
+#define ADRENO_GPMU_THROTTLE_COUNTERS 4
+/* base for throttle counters */
+#define ADRENO_GPMU_THROTTLE_COUNTERS_BASE_REG 43
+
+struct adreno_gpudev;
+
+/* Time to allow preemption to complete (in ms) */
+#define ADRENO_PREEMPT_TIMEOUT 10000
+
+#define ADRENO_INT_BIT(a, _bit) (((a)->gpucore->gpudev->int_bits) ? \
+		(adreno_get_int(a, _bit) < 0 ? 0 : \
+		BIT(adreno_get_int(a, _bit))) : 0)
+
+/**
+ * enum adreno_preempt_states
+ * ADRENO_PREEMPT_NONE: No preemption is scheduled
+ * ADRENO_PREEMPT_START: The S/W has started
+ * ADRENO_PREEMPT_TRIGGERED: A preeempt has been triggered in the HW
+ * ADRENO_PREEMPT_FAULTED: The preempt timer has fired
+ * ADRENO_PREEMPT_PENDING: The H/W has signaled preemption complete
+ * ADRENO_PREEMPT_COMPLETE: Preemption could not be finished in the IRQ handler,
+ * worker has been scheduled
+ */
+enum adreno_preempt_states {
+	ADRENO_PREEMPT_NONE = 0,
+	ADRENO_PREEMPT_START,
+	ADRENO_PREEMPT_TRIGGERED,
+	ADRENO_PREEMPT_FAULTED,
+	ADRENO_PREEMPT_PENDING,
+	ADRENO_PREEMPT_COMPLETE,
+};
+
+/**
+ * struct adreno_preemption
+ * @state: The current state of preemption
+ * @counters: Memory descriptor for the memory where the GPU writes the
+ * preemption counters on switch
+ * @timer: A timer to make sure preemption doesn't stall
+ * @work: A work struct for the preemption worker (for 5XX)
+ * @token_submit: Indicates if a preempt token has been submitted in
+ * current ringbuffer (for 4XX)
+ */
+struct adreno_preemption {
+	atomic_t state;
+	struct kgsl_memdesc counters;
+	struct timer_list timer;
+	struct work_struct work;
+	bool token_submit;
+};
+
+
+struct adreno_busy_data {
+	unsigned int gpu_busy;
+	unsigned int vbif_ram_cycles;
+	unsigned int vbif_starved_ram;
+	unsigned int throttle_cycles[ADRENO_GPMU_THROTTLE_COUNTERS];
+};
+
+/**
+ * struct adreno_gpu_core - A specific GPU core definition
+ * @gpurev: Unique GPU revision identifier
+ * @core: Match for the core version of the GPU
+ * @major: Match for the major version of the GPU
+ * @minor: Match for the minor version of the GPU
+ * @patchid: Match for the patch revision of the GPU
+ * @features: Common adreno features supported by this core
+ * @pm4fw_name: Filename for th PM4 firmware
+ * @pfpfw_name: Filename for the PFP firmware
+ * @zap_name: Filename for the Zap Shader ucode
+ * @gpudev: Pointer to the GPU family specific functions for this core
+ * @gmem_size: Amount of binning memory (GMEM/OCMEM) to reserve for the core
+ * @pm4_jt_idx: Index of the jump table in the PM4 microcode
+ * @pm4_jt_addr: Address offset to load the jump table for the PM4 microcode
+ * @pfp_jt_idx: Index of the jump table in the PFP microcode
+ * @pfp_jt_addr: Address offset to load the jump table for the PFP microcode
+ * @pm4_bstrp_size: Size of the bootstrap loader for PM4 microcode
+ * @pfp_bstrp_size: Size of the bootstrap loader for PFP microcde
+ * @pfp_bstrp_ver: Version of the PFP microcode that supports bootstraping
+ * @shader_offset: Offset of shader from gpu reg base
+ * @shader_size: Shader size
+ * @num_protected_regs: number of protected registers
+ * @gpmufw_name: Filename for the GPMU firmware
+ * @gpmu_major: Match for the GPMU & firmware, major revision
+ * @gpmu_minor: Match for the GPMU & firmware, minor revision
+ * @gpmu_features: Supported features for any given GPMU version
+ * @busy_mask: mask to check if GPU is busy in RBBM_STATUS
+ * @lm_major: Limits Management register sequence, major revision
+ * @lm_minor: LM register sequence, minor revision
+ * @regfw_name: Filename for the register sequence firmware
+ * @gpmu_tsens: ID for the temporature sensor used by the GPMU
+ * @max_power: Max possible power draw of a core, units elephant tail hairs
+ */
+struct adreno_gpu_core {
+	enum adreno_gpurev gpurev;
+	unsigned int core, major, minor, patchid;
+	unsigned long features;
+	const char *pm4fw_name;
+	const char *pfpfw_name;
+	const char *zap_name;
+	struct adreno_gpudev *gpudev;
+	size_t gmem_size;
+	unsigned int pm4_jt_idx;
+	unsigned int pm4_jt_addr;
+	unsigned int pfp_jt_idx;
+	unsigned int pfp_jt_addr;
+	unsigned int pm4_bstrp_size;
+	unsigned int pfp_bstrp_size;
+	unsigned int pfp_bstrp_ver;
+	unsigned long shader_offset;
+	unsigned int shader_size;
+	unsigned int num_protected_regs;
+	const char *gpmufw_name;
+	unsigned int gpmu_major;
+	unsigned int gpmu_minor;
+	unsigned int gpmu_features;
+	unsigned int busy_mask;
+	unsigned int lm_major, lm_minor;
+	const char *regfw_name;
+	unsigned int gpmu_tsens;
+	unsigned int max_power;
+};
+
+/**
+ * struct adreno_device - The mothership structure for all adreno related info
+ * @dev: Reference to struct kgsl_device
+ * @priv: Holds the private flags specific to the adreno_device
+ * @chipid: Chip ID specific to the GPU
+ * @gmem_base: Base physical address of GMEM
+ * @gmem_size: GMEM size
+ * @gpucore: Pointer to the adreno_gpu_core structure
+ * @pfp_fw: Buffer which holds the pfp ucode
+ * @pfp_fw_size: Size of pfp ucode buffer
+ * @pfp_fw_version: Version of pfp ucode
+ * @pfp: Memory descriptor which holds pfp ucode buffer info
+ * @pm4_fw: Buffer which holds the pm4 ucode
+ * @pm4_fw_size: Size of pm4 ucode buffer
+ * @pm4_fw_version: Version of pm4 ucode
+ * @pm4: Memory descriptor which holds pm4 ucode buffer info
+ * @gpmu_cmds_size: Length of gpmu cmd stream
+ * @gpmu_cmds: gpmu cmd stream
+ * @ringbuffers: Array of pointers to adreno_ringbuffers
+ * @num_ringbuffers: Number of ringbuffers for the GPU
+ * @cur_rb: Pointer to the current ringbuffer
+ * @next_rb: Ringbuffer we are switching to during preemption
+ * @prev_rb: Ringbuffer we are switching from during preemption
+ * @fast_hang_detect: Software fault detection availability
+ * @ft_policy: Defines the fault tolerance policy
+ * @long_ib_detect: Long IB detection availability
+ * @ft_pf_policy: Defines the fault policy for page faults
+ * @ocmem_hdl: Handle to the ocmem allocated buffer
+ * @profile: Container for adreno profiler information
+ * @dispatcher: Container for adreno GPU dispatcher
+ * @pwron_fixup: Command buffer to run a post-power collapse shader workaround
+ * @pwron_fixup_dwords: Number of dwords in the command buffer
+ * @input_work: Work struct for turning on the GPU after a touch event
+ * @busy_data: Struct holding GPU VBIF busy stats
+ * @ram_cycles_lo: Number of DDR clock cycles for the monitor session
+ * @perfctr_pwr_lo: Number of cycles VBIF is stalled by DDR
+ * @halt: Atomic variable to check whether the GPU is currently halted
+ * @ctx_d_debugfs: Context debugfs node
+ * @pwrctrl_flag: Flag to hold adreno specific power attributes
+ * @profile_buffer: Memdesc holding the drawobj profiling buffer
+ * @profile_index: Index to store the start/stop ticks in the profiling
+ * buffer
+ * @sp_local_gpuaddr: Base GPU virtual address for SP local memory
+ * @sp_pvt_gpuaddr: Base GPU virtual address for SP private memory
+ * @lm_fw: The LM firmware handle
+ * @lm_sequence: Pointer to the start of the register write sequence for LM
+ * @lm_size: The dword size of the LM sequence
+ * @lm_limit: limiting value for LM
+ * @lm_threshold_count: register value for counter for lm threshold breakin
+ * @lm_threshold_cross: number of current peaks exceeding threshold
+ * @speed_bin: Indicate which power level set to use
+ * @csdev: Pointer to a coresight device (if applicable)
+ * @gpmu_throttle_counters - counteers for number of throttled clocks
+ * @irq_storm_work: Worker to handle possible interrupt storms
+ * @active_list: List to track active contexts
+ * @active_list_lock: Lock to protect active_list
+ */
+struct adreno_device {
+	struct kgsl_device dev;    /* Must be first field in this struct */
+	unsigned long priv;
+	unsigned int chipid;
+	unsigned long gmem_base;
+	unsigned long gmem_size;
+	const struct adreno_gpu_core *gpucore;
+	unsigned int *pfp_fw;
+	size_t pfp_fw_size;
+	unsigned int pfp_fw_version;
+	struct kgsl_memdesc pfp;
+	unsigned int *pm4_fw;
+	size_t pm4_fw_size;
+	unsigned int pm4_fw_version;
+	struct kgsl_memdesc pm4;
+	size_t gpmu_cmds_size;
+	unsigned int *gpmu_cmds;
+	struct adreno_ringbuffer ringbuffers[KGSL_PRIORITY_MAX_RB_LEVELS];
+	int num_ringbuffers;
+	struct adreno_ringbuffer *cur_rb;
+	struct adreno_ringbuffer *next_rb;
+	struct adreno_ringbuffer *prev_rb;
+	unsigned int fast_hang_detect;
+	unsigned long ft_policy;
+	unsigned int long_ib_detect;
+	unsigned long ft_pf_policy;
+	struct ocmem_buf *ocmem_hdl;
+	struct adreno_profile profile;
+	struct adreno_dispatcher dispatcher;
+	struct kgsl_memdesc pwron_fixup;
+	unsigned int pwron_fixup_dwords;
+	struct work_struct input_work;
+	struct adreno_busy_data busy_data;
+	unsigned int ram_cycles_lo;
+	unsigned int starved_ram_lo;
+	unsigned int perfctr_pwr_lo;
+	atomic_t halt;
+	struct dentry *ctx_d_debugfs;
+	unsigned long pwrctrl_flag;
+
+	struct kgsl_memdesc profile_buffer;
+	unsigned int profile_index;
+	uint64_t sp_local_gpuaddr;
+	uint64_t sp_pvt_gpuaddr;
+	const struct firmware *lm_fw;
+	uint32_t *lm_sequence;
+	uint32_t lm_size;
+	struct adreno_preemption preempt;
+	struct work_struct gpmu_work;
+	uint32_t lm_leakage;
+	uint32_t lm_limit;
+	uint32_t lm_threshold_count;
+	uint32_t lm_threshold_cross;
+
+	unsigned int speed_bin;
+	unsigned int quirks;
+
+	struct coresight_device *csdev;
+	uint32_t gpmu_throttle_counters[ADRENO_GPMU_THROTTLE_COUNTERS];
+	struct work_struct irq_storm_work;
+
+	struct list_head active_list;
+	spinlock_t active_list_lock;
+};
+
+/**
+ * enum adreno_device_flags - Private flags for the adreno_device
+ * @ADRENO_DEVICE_PWRON - Set during init after a power collapse
+ * @ADRENO_DEVICE_PWRON_FIXUP - Set if the target requires the shader fixup
+ * after power collapse
+ * @ADRENO_DEVICE_CORESIGHT - Set if the coresight (trace bus) registers should
+ * be restored after power collapse
+ * @ADRENO_DEVICE_HANG_INTR - Set if the hang interrupt should be enabled for
+ * this target
+ * @ADRENO_DEVICE_STARTED - Set if the device start sequence is in progress
+ * @ADRENO_DEVICE_FAULT - Set if the device is currently in fault (and shouldn't
+ * send any more commands to the ringbuffer)
+ * @ADRENO_DEVICE_DRAWOBJ_PROFILE - Set if the device supports drawobj
+ * profiling via the ALWAYSON counter
+ * @ADRENO_DEVICE_PREEMPTION - Turn on/off preemption
+ * @ADRENO_DEVICE_SOFT_FAULT_DETECT - Set if soft fault detect is enabled
+ * @ADRENO_DEVICE_GPMU_INITIALIZED - Set if GPMU firmware initialization succeed
+ * @ADRENO_DEVICE_ISDB_ENABLED - Set if the Integrated Shader DeBugger is
+ * attached and enabled
+ * @ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED - Set if a CACHE_FLUSH_TS irq storm
+ * is in progress
+ */
+enum adreno_device_flags {
+	ADRENO_DEVICE_PWRON = 0,
+	ADRENO_DEVICE_PWRON_FIXUP = 1,
+	ADRENO_DEVICE_INITIALIZED = 2,
+	ADRENO_DEVICE_CORESIGHT = 3,
+	ADRENO_DEVICE_HANG_INTR = 4,
+	ADRENO_DEVICE_STARTED = 5,
+	ADRENO_DEVICE_FAULT = 6,
+	ADRENO_DEVICE_DRAWOBJ_PROFILE = 7,
+	ADRENO_DEVICE_GPU_REGULATOR_ENABLED = 8,
+	ADRENO_DEVICE_PREEMPTION = 9,
+	ADRENO_DEVICE_SOFT_FAULT_DETECT = 10,
+	ADRENO_DEVICE_GPMU_INITIALIZED = 11,
+	ADRENO_DEVICE_ISDB_ENABLED = 12,
+	ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED = 13,
+};
+
+/**
+ * struct adreno_drawobj_profile_entry - a single drawobj entry in the
+ * kernel profiling buffer
+ * @started: Number of GPU ticks at start of the drawobj
+ * @retired: Number of GPU ticks at the end of the drawobj
+ */
+struct adreno_drawobj_profile_entry {
+	uint64_t started;
+	uint64_t retired;
+};
+
+#define ADRENO_DRAWOBJ_PROFILE_COUNT \
+	(PAGE_SIZE / sizeof(struct adreno_drawobj_profile_entry))
+
+#define ADRENO_DRAWOBJ_PROFILE_OFFSET(_index, _member) \
+	 ((_index) * sizeof(struct adreno_drawobj_profile_entry) \
+	  + offsetof(struct adreno_drawobj_profile_entry, _member))
+
+
+/**
+ * adreno_regs: List of registers that are used in kgsl driver for all
+ * 3D devices. Each device type has different offset value for the same
+ * register, so an array of register offsets are declared for every device
+ * and are indexed by the enumeration values defined in this enum
+ */
+enum adreno_regs {
+	ADRENO_REG_CP_ME_RAM_WADDR,
+	ADRENO_REG_CP_ME_RAM_DATA,
+	ADRENO_REG_CP_PFP_UCODE_DATA,
+	ADRENO_REG_CP_PFP_UCODE_ADDR,
+	ADRENO_REG_CP_WFI_PEND_CTR,
+	ADRENO_REG_CP_RB_BASE,
+	ADRENO_REG_CP_RB_BASE_HI,
+	ADRENO_REG_CP_RB_RPTR_ADDR_LO,
+	ADRENO_REG_CP_RB_RPTR_ADDR_HI,
+	ADRENO_REG_CP_RB_RPTR,
+	ADRENO_REG_CP_RB_WPTR,
+	ADRENO_REG_CP_CNTL,
+	ADRENO_REG_CP_ME_CNTL,
+	ADRENO_REG_CP_RB_CNTL,
+	ADRENO_REG_CP_IB1_BASE,
+	ADRENO_REG_CP_IB1_BASE_HI,
+	ADRENO_REG_CP_IB1_BUFSZ,
+	ADRENO_REG_CP_IB2_BASE,
+	ADRENO_REG_CP_IB2_BASE_HI,
+	ADRENO_REG_CP_IB2_BUFSZ,
+	ADRENO_REG_CP_TIMESTAMP,
+	ADRENO_REG_CP_SCRATCH_REG6,
+	ADRENO_REG_CP_SCRATCH_REG7,
+	ADRENO_REG_CP_ME_RAM_RADDR,
+	ADRENO_REG_CP_ROQ_ADDR,
+	ADRENO_REG_CP_ROQ_DATA,
+	ADRENO_REG_CP_MERCIU_ADDR,
+	ADRENO_REG_CP_MERCIU_DATA,
+	ADRENO_REG_CP_MERCIU_DATA2,
+	ADRENO_REG_CP_MEQ_ADDR,
+	ADRENO_REG_CP_MEQ_DATA,
+	ADRENO_REG_CP_HW_FAULT,
+	ADRENO_REG_CP_PROTECT_STATUS,
+	ADRENO_REG_CP_PREEMPT,
+	ADRENO_REG_CP_PREEMPT_DEBUG,
+	ADRENO_REG_CP_PREEMPT_DISABLE,
+	ADRENO_REG_CP_PROTECT_REG_0,
+	ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+	ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+	ADRENO_REG_RBBM_STATUS,
+	ADRENO_REG_RBBM_STATUS3,
+	ADRENO_REG_RBBM_PERFCTR_CTL,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3,
+	ADRENO_REG_RBBM_PERFCTR_PWR_1_LO,
+	ADRENO_REG_RBBM_INT_0_MASK,
+	ADRENO_REG_RBBM_INT_0_STATUS,
+	ADRENO_REG_RBBM_PM_OVERRIDE2,
+	ADRENO_REG_RBBM_INT_CLEAR_CMD,
+	ADRENO_REG_RBBM_SW_RESET_CMD,
+	ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD,
+	ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD2,
+	ADRENO_REG_RBBM_CLOCK_CTL,
+	ADRENO_REG_VPC_DEBUG_RAM_SEL,
+	ADRENO_REG_VPC_DEBUG_RAM_READ,
+	ADRENO_REG_PA_SC_AA_CONFIG,
+	ADRENO_REG_SQ_GPR_MANAGEMENT,
+	ADRENO_REG_SQ_INST_STORE_MANAGEMENT,
+	ADRENO_REG_TP0_CHICKEN,
+	ADRENO_REG_RBBM_RBBM_CTL,
+	ADRENO_REG_UCHE_INVALIDATE0,
+	ADRENO_REG_UCHE_INVALIDATE1,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
+	ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
+	ADRENO_REG_RBBM_SECVID_TRUST_CONTROL,
+	ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
+	ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI,
+	ADRENO_REG_RBBM_SECVID_TRUST_CONFIG,
+	ADRENO_REG_RBBM_SECVID_TSB_CONTROL,
+	ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE,
+	ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE_HI,
+	ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_SIZE,
+	ADRENO_REG_VBIF_XIN_HALT_CTRL0,
+	ADRENO_REG_VBIF_XIN_HALT_CTRL1,
+	ADRENO_REG_VBIF_VERSION,
+	ADRENO_REG_REGISTER_MAX,
+};
+
+enum adreno_int_bits {
+	ADRENO_INT_RBBM_AHB_ERROR,
+	ADRENO_INT_BITS_MAX,
+};
+
+/**
+ * adreno_reg_offsets: Holds array of register offsets
+ * @offsets: Offset array of size defined by enum adreno_regs
+ * @offset_0: This is the index of the register in offset array whose value
+ * is 0. 0 is a valid register offset and during initialization of the
+ * offset array we need to know if an offset value is correctly defined to 0
+ */
+struct adreno_reg_offsets {
+	unsigned int *const offsets;
+	enum adreno_regs offset_0;
+};
+
+#define ADRENO_REG_UNUSED	0xFFFFFFFF
+#define ADRENO_REG_SKIP	0xFFFFFFFE
+#define ADRENO_REG_DEFINE(_offset, _reg) [_offset] = _reg
+#define ADRENO_INT_DEFINE(_offset, _val) ADRENO_REG_DEFINE(_offset, _val)
+
+/*
+ * struct adreno_vbif_data - Describes vbif register value pair
+ * @reg: Offset to vbif register
+ * @val: The value that should be programmed in the register at reg
+ */
+struct adreno_vbif_data {
+	unsigned int reg;
+	unsigned int val;
+};
+
+/*
+ * struct adreno_vbif_platform - Holds an array of vbif reg value pairs
+ * for a particular core
+ * @devfunc: Pointer to platform/core identification function
+ * @vbif: Array of reg value pairs for vbif registers
+ */
+struct adreno_vbif_platform {
+	int (*devfunc)(struct adreno_device *);
+	const struct adreno_vbif_data *vbif;
+};
+
+/*
+ * struct adreno_vbif_snapshot_registers - Holds an array of vbif registers
+ * listed for snapshot dump for a particular core
+ * @version: vbif version
+ * @mask: vbif revision mask
+ * @registers: vbif registers listed for snapshot dump
+ * @count: count of vbif registers listed for snapshot
+ */
+struct adreno_vbif_snapshot_registers {
+	const unsigned int version;
+	const unsigned int mask;
+	const unsigned int *registers;
+	const int count;
+};
+
+/**
+ * struct adreno_coresight_register - Definition for a coresight (tracebus)
+ * debug register
+ * @offset: Offset of the debug register in the KGSL mmio region
+ * @initial: Default value to write when coresight is enabled
+ * @value: Current shadow value of the register (to be reprogrammed after power
+ * collapse)
+ */
+struct adreno_coresight_register {
+	unsigned int offset;
+	unsigned int initial;
+	unsigned int value;
+};
+
+struct adreno_coresight_attr {
+	struct device_attribute attr;
+	struct adreno_coresight_register *reg;
+};
+
+ssize_t adreno_coresight_show_register(struct device *device,
+		struct device_attribute *attr, char *buf);
+
+ssize_t adreno_coresight_store_register(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size);
+
+#define ADRENO_CORESIGHT_ATTR(_attrname, _reg) \
+	struct adreno_coresight_attr coresight_attr_##_attrname  = { \
+		__ATTR(_attrname, 0644, \
+		adreno_coresight_show_register, \
+		adreno_coresight_store_register), \
+		(_reg), }
+
+/**
+ * struct adreno_coresight - GPU specific coresight definition
+ * @registers - Array of GPU specific registers to configure trace bus output
+ * @count - Number of registers in the array
+ * @groups - Pointer to an attribute list of control files
+ * @atid - The unique ATID value of the coresight device
+ */
+struct adreno_coresight {
+	struct adreno_coresight_register *registers;
+	unsigned int count;
+	const struct attribute_group **groups;
+	unsigned int atid;
+};
+
+
+struct adreno_irq_funcs {
+	void (*func)(struct adreno_device *, int);
+};
+#define ADRENO_IRQ_CALLBACK(_c) { .func = _c }
+
+struct adreno_irq {
+	unsigned int mask;
+	struct adreno_irq_funcs *funcs;
+};
+
+/*
+ * struct adreno_debugbus_block - Holds info about debug buses of a chip
+ * @block_id: Bus identifier
+ * @dwords: Number of dwords of data that this block holds
+ */
+struct adreno_debugbus_block {
+	unsigned int block_id;
+	unsigned int dwords;
+};
+
+/*
+ * struct adreno_snapshot_section_sizes - Structure holding the size of
+ * different sections dumped during device snapshot
+ * @cp_pfp: CP PFP data section size
+ * @cp_me: CP ME data section size
+ * @vpc_mem: VPC memory section size
+ * @cp_meq: CP MEQ size
+ * @shader_mem: Size of shader memory of 1 shader section
+ * @cp_merciu: CP MERCIU size
+ * @roq: ROQ size
+ */
+struct adreno_snapshot_sizes {
+	int cp_pfp;
+	int cp_me;
+	int vpc_mem;
+	int cp_meq;
+	int shader_mem;
+	int cp_merciu;
+	int roq;
+};
+
+/*
+ * struct adreno_snapshot_data - Holds data used in snapshot
+ * @sect_sizes: Has sections sizes
+ */
+struct adreno_snapshot_data {
+	struct adreno_snapshot_sizes *sect_sizes;
+};
+
+struct adreno_gpudev {
+	/*
+	 * These registers are in a different location on different devices,
+	 * so define them in the structure and use them as variables.
+	 */
+	const struct adreno_reg_offsets *reg_offsets;
+	unsigned int *const int_bits;
+	const struct adreno_ft_perf_counters *ft_perf_counters;
+	unsigned int ft_perf_counters_count;
+
+	struct adreno_perfcounters *perfcounters;
+	const struct adreno_invalid_countables *invalid_countables;
+	struct adreno_snapshot_data *snapshot_data;
+
+	struct adreno_coresight *coresight;
+
+	struct adreno_irq *irq;
+	int num_prio_levels;
+	unsigned int vbif_xin_halt_ctrl0_mask;
+	/* GPU specific function hooks */
+	void (*irq_trace)(struct adreno_device *, unsigned int status);
+	void (*snapshot)(struct adreno_device *, struct kgsl_snapshot *);
+	void (*platform_setup)(struct adreno_device *);
+	void (*init)(struct adreno_device *);
+	void (*remove)(struct adreno_device *);
+	int (*rb_start)(struct adreno_device *, unsigned int start_type);
+	int (*microcode_read)(struct adreno_device *);
+	void (*perfcounter_init)(struct adreno_device *);
+	void (*perfcounter_close)(struct adreno_device *);
+	void (*start)(struct adreno_device *);
+	bool (*is_sptp_idle)(struct adreno_device *);
+	int (*regulator_enable)(struct adreno_device *);
+	void (*regulator_disable)(struct adreno_device *);
+	void (*pwrlevel_change_settings)(struct adreno_device *,
+				unsigned int prelevel, unsigned int postlevel,
+				bool post);
+	uint64_t (*read_throttling_counters)(struct adreno_device *);
+	void (*count_throttles)(struct adreno_device *, uint64_t adj);
+	int (*enable_pwr_counters)(struct adreno_device *,
+				unsigned int counter);
+	unsigned int (*preemption_pre_ibsubmit)(
+				struct adreno_device *adreno_dev,
+				struct adreno_ringbuffer *rb,
+				unsigned int *cmds,
+				struct kgsl_context *context);
+	int (*preemption_yield_enable)(unsigned int *);
+	unsigned int (*preemption_post_ibsubmit)(
+				struct adreno_device *adreno_dev,
+				unsigned int *cmds);
+	int (*preemption_init)(struct adreno_device *);
+	void (*preemption_schedule)(struct adreno_device *);
+	void (*enable_64bit)(struct adreno_device *);
+	void (*clk_set_options)(struct adreno_device *,
+				const char *, struct clk *);
+};
+
+/**
+ * enum kgsl_ft_policy_bits - KGSL fault tolerance policy bits
+ * @KGSL_FT_OFF: Disable fault detection (not used)
+ * @KGSL_FT_REPLAY: Replay the faulting command
+ * @KGSL_FT_SKIPIB: Skip the faulting indirect buffer
+ * @KGSL_FT_SKIPFRAME: Skip the frame containing the faulting IB
+ * @KGSL_FT_DISABLE: Tells the dispatcher to disable FT for the command obj
+ * @KGSL_FT_TEMP_DISABLE: Disables FT for all commands
+ * @KGSL_FT_THROTTLE: Disable the context if it faults too often
+ * @KGSL_FT_SKIPCMD: Skip the command containing the faulting IB
+ */
+enum kgsl_ft_policy_bits {
+	KGSL_FT_OFF = 0,
+	KGSL_FT_REPLAY = 1,
+	KGSL_FT_SKIPIB = 2,
+	KGSL_FT_SKIPFRAME = 3,
+	KGSL_FT_DISABLE = 4,
+	KGSL_FT_TEMP_DISABLE = 5,
+	KGSL_FT_THROTTLE = 6,
+	KGSL_FT_SKIPCMD = 7,
+	/* KGSL_FT_MAX_BITS is used to calculate the mask */
+	KGSL_FT_MAX_BITS,
+	/* Internal bits - set during GFT */
+	/* Skip the PM dump on replayed command obj's */
+	KGSL_FT_SKIP_PMDUMP = 31,
+};
+
+#define KGSL_FT_POLICY_MASK GENMASK(KGSL_FT_MAX_BITS - 1, 0)
+
+#define  KGSL_FT_DEFAULT_POLICY \
+	(BIT(KGSL_FT_REPLAY) | \
+	 BIT(KGSL_FT_SKIPCMD) | \
+	 BIT(KGSL_FT_THROTTLE))
+
+#define ADRENO_FT_TYPES \
+	{ BIT(KGSL_FT_OFF), "off" }, \
+	{ BIT(KGSL_FT_REPLAY), "replay" }, \
+	{ BIT(KGSL_FT_SKIPIB), "skipib" }, \
+	{ BIT(KGSL_FT_SKIPFRAME), "skipframe" }, \
+	{ BIT(KGSL_FT_DISABLE), "disable" }, \
+	{ BIT(KGSL_FT_TEMP_DISABLE), "temp" }, \
+	{ BIT(KGSL_FT_THROTTLE), "throttle"}, \
+	{ BIT(KGSL_FT_SKIPCMD), "skipcmd" }
+
+/**
+ * enum kgsl_ft_pagefault_policy_bits - KGSL pagefault policy bits
+ * @KGSL_FT_PAGEFAULT_INT_ENABLE: No longer used, but retained for compatibility
+ * @KGSL_FT_PAGEFAULT_GPUHALT_ENABLE: enable GPU halt on pagefaults
+ * @KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE: log one pagefault per page
+ * @KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT: log one pagefault per interrupt
+ */
+enum {
+	KGSL_FT_PAGEFAULT_INT_ENABLE = 0,
+	KGSL_FT_PAGEFAULT_GPUHALT_ENABLE = 1,
+	KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE = 2,
+	KGSL_FT_PAGEFAULT_LOG_ONE_PER_INT = 3,
+	/* KGSL_FT_PAGEFAULT_MAX_BITS is used to calculate the mask */
+	KGSL_FT_PAGEFAULT_MAX_BITS,
+};
+
+#define KGSL_FT_PAGEFAULT_MASK GENMASK(KGSL_FT_PAGEFAULT_MAX_BITS - 1, 0)
+
+#define KGSL_FT_PAGEFAULT_DEFAULT_POLICY 0
+
+#define FOR_EACH_RINGBUFFER(_dev, _rb, _i)			\
+	for ((_i) = 0, (_rb) = &((_dev)->ringbuffers[0]);	\
+		(_i) < (_dev)->num_ringbuffers;			\
+		(_i)++, (_rb)++)
+
+struct adreno_ft_perf_counters {
+	unsigned int counter;
+	unsigned int countable;
+};
+
+extern unsigned int *adreno_ft_regs;
+extern unsigned int adreno_ft_regs_num;
+extern unsigned int *adreno_ft_regs_val;
+
+extern struct adreno_gpudev adreno_a3xx_gpudev;
+extern struct adreno_gpudev adreno_a4xx_gpudev;
+extern struct adreno_gpudev adreno_a5xx_gpudev;
+
+extern int adreno_wake_nice;
+extern unsigned int adreno_wake_timeout;
+
+long adreno_ioctl(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg);
+
+long adreno_ioctl_helper(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg,
+		const struct kgsl_ioctl *cmds, int len);
+
+int adreno_spin_idle(struct adreno_device *device, unsigned int timeout);
+int adreno_idle(struct kgsl_device *device);
+bool adreno_isidle(struct kgsl_device *device);
+
+int adreno_set_constraint(struct kgsl_device *device,
+				struct kgsl_context *context,
+				struct kgsl_device_constraint *constraint);
+
+void adreno_shadermem_regread(struct kgsl_device *device,
+						unsigned int offsetwords,
+						unsigned int *value);
+
+void adreno_snapshot(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		struct kgsl_context *context);
+
+int adreno_reset(struct kgsl_device *device, int fault);
+
+void adreno_fault_skipcmd_detached(struct adreno_device *adreno_dev,
+					 struct adreno_context *drawctxt,
+					 struct kgsl_drawobj *drawobj);
+
+int adreno_coresight_init(struct adreno_device *adreno_dev);
+
+void adreno_coresight_start(struct adreno_device *adreno_dev);
+void adreno_coresight_stop(struct adreno_device *adreno_dev);
+
+void adreno_coresight_remove(struct adreno_device *adreno_dev);
+
+bool adreno_hw_isidle(struct adreno_device *adreno_dev);
+
+void adreno_fault_detect_start(struct adreno_device *adreno_dev);
+void adreno_fault_detect_stop(struct adreno_device *adreno_dev);
+
+void adreno_hang_int_callback(struct adreno_device *adreno_dev, int bit);
+void adreno_cp_callback(struct adreno_device *adreno_dev, int bit);
+
+int adreno_sysfs_init(struct adreno_device *adreno_dev);
+void adreno_sysfs_close(struct adreno_device *adreno_dev);
+
+void adreno_irqctrl(struct adreno_device *adreno_dev, int state);
+
+long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data);
+
+long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data);
+
+int adreno_efuse_map(struct adreno_device *adreno_dev);
+int adreno_efuse_read_u32(struct adreno_device *adreno_dev, unsigned int offset,
+		unsigned int *val);
+void adreno_efuse_unmap(struct adreno_device *adreno_dev);
+
+#define ADRENO_TARGET(_name, _id) \
+static inline int adreno_is_##_name(struct adreno_device *adreno_dev) \
+{ \
+	return (ADRENO_GPUREV(adreno_dev) == (_id)); \
+}
+
+static inline int adreno_is_a3xx(struct adreno_device *adreno_dev)
+{
+	return ((ADRENO_GPUREV(adreno_dev) >= 300) &&
+		(ADRENO_GPUREV(adreno_dev) < 400));
+}
+
+ADRENO_TARGET(a304, ADRENO_REV_A304)
+ADRENO_TARGET(a305, ADRENO_REV_A305)
+ADRENO_TARGET(a305b, ADRENO_REV_A305B)
+ADRENO_TARGET(a305c, ADRENO_REV_A305C)
+ADRENO_TARGET(a306, ADRENO_REV_A306)
+ADRENO_TARGET(a306a, ADRENO_REV_A306A)
+ADRENO_TARGET(a310, ADRENO_REV_A310)
+ADRENO_TARGET(a320, ADRENO_REV_A320)
+ADRENO_TARGET(a330, ADRENO_REV_A330)
+
+static inline int adreno_is_a330v2(struct adreno_device *adreno_dev)
+{
+	return ((ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A330) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) > 0));
+}
+
+static inline int adreno_is_a330v21(struct adreno_device *adreno_dev)
+{
+	return ((ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A330) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) > 0xF));
+}
+
+static inline int adreno_is_a4xx(struct adreno_device *adreno_dev)
+{
+	return ADRENO_GPUREV(adreno_dev) >= 400 &&
+		ADRENO_GPUREV(adreno_dev) < 500;
+}
+
+ADRENO_TARGET(a405, ADRENO_REV_A405);
+
+static inline int adreno_is_a405v2(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A405) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0x10);
+}
+
+ADRENO_TARGET(a418, ADRENO_REV_A418)
+ADRENO_TARGET(a420, ADRENO_REV_A420)
+ADRENO_TARGET(a430, ADRENO_REV_A430)
+
+static inline int adreno_is_a430v2(struct adreno_device *adreno_dev)
+{
+	return ((ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A430) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1));
+}
+
+static inline int adreno_is_a5xx(struct adreno_device *adreno_dev)
+{
+	return ADRENO_GPUREV(adreno_dev) >= 500 &&
+			ADRENO_GPUREV(adreno_dev) < 600;
+}
+
+ADRENO_TARGET(a505, ADRENO_REV_A505)
+ADRENO_TARGET(a506, ADRENO_REV_A506)
+ADRENO_TARGET(a510, ADRENO_REV_A510)
+ADRENO_TARGET(a512, ADRENO_REV_A512)
+ADRENO_TARGET(a530, ADRENO_REV_A530)
+ADRENO_TARGET(a540, ADRENO_REV_A540)
+
+static inline int adreno_is_a530v1(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0);
+}
+
+static inline int adreno_is_a530v2(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1);
+}
+
+static inline int adreno_is_a530v3(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A530) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 2);
+}
+
+static inline int adreno_is_a505_or_a506(struct adreno_device *adreno_dev)
+{
+	return ADRENO_GPUREV(adreno_dev) >= 505 &&
+			ADRENO_GPUREV(adreno_dev) <= 506;
+}
+
+static inline int adreno_is_a540v1(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A540) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 0);
+}
+
+static inline int adreno_is_a540v2(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_GPUREV(adreno_dev) == ADRENO_REV_A540) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) == 1);
+}
+
+/*
+ * adreno_checkreg_off() - Checks the validity of a register enum
+ * @adreno_dev:		Pointer to adreno device
+ * @offset_name:	The register enum that is checked
+ */
+static inline bool adreno_checkreg_off(struct adreno_device *adreno_dev,
+					enum adreno_regs offset_name)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (offset_name >= ADRENO_REG_REGISTER_MAX ||
+		gpudev->reg_offsets->offsets[offset_name] == ADRENO_REG_UNUSED)
+		return false;
+
+	/*
+	 * GPU register programming is kept common as much as possible
+	 * across the cores, Use ADRENO_REG_SKIP when certain register
+	 * programming needs to be skipped for certain GPU cores.
+	 * Example: Certain registers on a5xx like IB1_BASE are 64 bit.
+	 * Common programming programs 64bit register but upper 32 bits
+	 * are skipped in a4xx and a3xx using ADRENO_REG_SKIP.
+	 */
+	if (gpudev->reg_offsets->offsets[offset_name] == ADRENO_REG_SKIP)
+		return false;
+
+	return true;
+}
+
+/*
+ * adreno_readreg() - Read a register by getting its offset from the
+ * offset array defined in gpudev node
+ * @adreno_dev:		Pointer to the the adreno device
+ * @offset_name:	The register enum that is to be read
+ * @val:		Register value read is placed here
+ */
+static inline void adreno_readreg(struct adreno_device *adreno_dev,
+				enum adreno_regs offset_name, unsigned int *val)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (adreno_checkreg_off(adreno_dev, offset_name))
+		kgsl_regread(KGSL_DEVICE(adreno_dev),
+				gpudev->reg_offsets->offsets[offset_name], val);
+	else
+		*val = 0;
+}
+
+/*
+ * adreno_writereg() - Write a register by getting its offset from the
+ * offset array defined in gpudev node
+ * @adreno_dev:		Pointer to the the adreno device
+ * @offset_name:	The register enum that is to be written
+ * @val:		Value to write
+ */
+static inline void adreno_writereg(struct adreno_device *adreno_dev,
+				enum adreno_regs offset_name, unsigned int val)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (adreno_checkreg_off(adreno_dev, offset_name))
+		kgsl_regwrite(KGSL_DEVICE(adreno_dev),
+				gpudev->reg_offsets->offsets[offset_name], val);
+}
+
+/*
+ * adreno_getreg() - Returns the offset value of a register from the
+ * register offset array in the gpudev node
+ * @adreno_dev:		Pointer to the the adreno device
+ * @offset_name:	The register enum whore offset is returned
+ */
+static inline unsigned int adreno_getreg(struct adreno_device *adreno_dev,
+				enum adreno_regs offset_name)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (!adreno_checkreg_off(adreno_dev, offset_name))
+		return ADRENO_REG_REGISTER_MAX;
+	return gpudev->reg_offsets->offsets[offset_name];
+}
+
+/*
+ * adreno_get_int() - Returns the offset value of an interrupt bit from
+ * the interrupt bit array in the gpudev node
+ * @adreno_dev:		Pointer to the the adreno device
+ * @bit_name:		The interrupt bit enum whose bit is returned
+ */
+static inline unsigned int adreno_get_int(struct adreno_device *adreno_dev,
+				enum adreno_int_bits bit_name)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (bit_name >= ADRENO_INT_BITS_MAX)
+		return -ERANGE;
+
+	return gpudev->int_bits[bit_name];
+}
+
+/**
+ * adreno_gpu_fault() - Return the current state of the GPU
+ * @adreno_dev: A pointer to the adreno_device to query
+ *
+ * Return 0 if there is no fault or positive with the last type of fault that
+ * occurred
+ */
+static inline unsigned int adreno_gpu_fault(struct adreno_device *adreno_dev)
+{
+	/* make sure we're reading the latest value */
+	smp_rmb();
+	return atomic_read(&adreno_dev->dispatcher.fault);
+}
+
+/**
+ * adreno_set_gpu_fault() - Set the current fault status of the GPU
+ * @adreno_dev: A pointer to the adreno_device to set
+ * @state: fault state to set
+ *
+ */
+static inline void adreno_set_gpu_fault(struct adreno_device *adreno_dev,
+	int state)
+{
+	/* only set the fault bit w/o overwriting other bits */
+	atomic_add(state, &adreno_dev->dispatcher.fault);
+
+	/* make sure other CPUs see the update */
+	smp_wmb();
+}
+
+
+/**
+ * adreno_clear_gpu_fault() - Clear the GPU fault register
+ * @adreno_dev: A pointer to an adreno_device structure
+ *
+ * Clear the GPU fault status for the adreno device
+ */
+
+static inline void adreno_clear_gpu_fault(struct adreno_device *adreno_dev)
+{
+	atomic_set(&adreno_dev->dispatcher.fault, 0);
+
+	/* make sure other CPUs see the update */
+	smp_wmb();
+}
+
+/**
+ * adreno_gpu_halt() - Return the GPU halt refcount
+ * @adreno_dev: A pointer to the adreno_device
+ */
+static inline int adreno_gpu_halt(struct adreno_device *adreno_dev)
+{
+	/* make sure we're reading the latest value */
+	smp_rmb();
+	return atomic_read(&adreno_dev->halt);
+}
+
+
+/**
+ * adreno_clear_gpu_halt() - Clear the GPU halt refcount
+ * @adreno_dev: A pointer to the adreno_device
+ */
+static inline void adreno_clear_gpu_halt(struct adreno_device *adreno_dev)
+{
+	atomic_set(&adreno_dev->halt, 0);
+
+	/* make sure other CPUs see the update */
+	smp_wmb();
+}
+
+/**
+ * adreno_get_gpu_halt() - Increment GPU halt refcount
+ * @adreno_dev: A pointer to the adreno_device
+ */
+static inline void adreno_get_gpu_halt(struct adreno_device *adreno_dev)
+{
+	atomic_inc(&adreno_dev->halt);
+}
+
+/**
+ * adreno_put_gpu_halt() - Decrement GPU halt refcount
+ * @adreno_dev: A pointer to the adreno_device
+ */
+static inline void adreno_put_gpu_halt(struct adreno_device *adreno_dev)
+{
+	/* Make sure the refcount is good */
+	int ret = atomic_dec_if_positive(&adreno_dev->halt);
+
+	WARN(ret < 0, "GPU halt refcount unbalanced\n");
+}
+
+
+/*
+ * adreno_vbif_start() - Program VBIF registers, called in device start
+ * @adreno_dev: Pointer to device whose vbif data is to be programmed
+ * @vbif_platforms: list register value pair of vbif for a family
+ * of adreno cores
+ * @num_platforms: Number of platforms contained in vbif_platforms
+ */
+static inline void adreno_vbif_start(struct adreno_device *adreno_dev,
+			const struct adreno_vbif_platform *vbif_platforms,
+			int num_platforms)
+{
+	int i;
+	const struct adreno_vbif_data *vbif = NULL;
+
+	for (i = 0; i < num_platforms; i++) {
+		if (vbif_platforms[i].devfunc(adreno_dev)) {
+			vbif = vbif_platforms[i].vbif;
+			break;
+		}
+	}
+
+	while ((vbif != NULL) && (vbif->reg != 0)) {
+		kgsl_regwrite(KGSL_DEVICE(adreno_dev), vbif->reg, vbif->val);
+		vbif++;
+	}
+}
+
+/**
+ * adreno_set_protected_registers() - Protect the specified range of registers
+ * from being accessed by the GPU
+ * @adreno_dev: pointer to the Adreno device
+ * @index: Pointer to the index of the protect mode register to write to
+ * @reg: Starting dword register to write
+ * @mask_len: Size of the mask to protect (# of registers = 2 ** mask_len)
+ *
+ * Add the range of registers to the list of protected mode registers that will
+ * cause an exception if the GPU accesses them.  There are 16 available
+ * protected mode registers.  Index is used to specify which register to write
+ * to - the intent is to call this function multiple times with the same index
+ * pointer for each range and the registers will be magically programmed in
+ * incremental fashion
+ */
+static inline void adreno_set_protected_registers(
+		struct adreno_device *adreno_dev, unsigned int *index,
+		unsigned int reg, int mask_len)
+{
+	unsigned int val;
+	unsigned int base =
+		adreno_getreg(adreno_dev, ADRENO_REG_CP_PROTECT_REG_0);
+	unsigned int offset = *index;
+	unsigned int max_slots = adreno_dev->gpucore->num_protected_regs ?
+				adreno_dev->gpucore->num_protected_regs : 16;
+
+	/* Do we have a free slot? */
+	if (WARN(*index >= max_slots, "Protected register slots full: %d/%d\n",
+					*index, max_slots))
+		return;
+
+	/*
+	 * On A4XX targets with more than 16 protected mode registers
+	 * the upper registers are not contiguous with the lower 16
+	 * registers so we have to adjust the base and offset accordingly
+	 */
+
+	if (adreno_is_a4xx(adreno_dev) && *index >= 0x10) {
+		base = A4XX_CP_PROTECT_REG_10;
+		offset = *index - 0x10;
+	}
+
+	val = 0x60000000 | ((mask_len & 0x1F) << 24) | ((reg << 2) & 0xFFFFF);
+
+	kgsl_regwrite(KGSL_DEVICE(adreno_dev), base + offset, val);
+	*index = *index + 1;
+}
+
+#ifdef CONFIG_DEBUG_FS
+void adreno_debugfs_init(struct adreno_device *adreno_dev);
+void adreno_context_debugfs_init(struct adreno_device *adreno_dev,
+				struct adreno_context *ctx);
+#else
+static inline void adreno_debugfs_init(struct adreno_device *adreno_dev) { }
+static inline void adreno_context_debugfs_init(struct adreno_device *device,
+						struct adreno_context *context)
+						{ }
+#endif
+
+/**
+ * adreno_compare_pm4_version() - Compare the PM4 microcode version
+ * @adreno_dev: Pointer to the adreno_device struct
+ * @version: Version number to compare again
+ *
+ * Compare the current version against the specified version and return -1 if
+ * the current code is older, 0 if equal or 1 if newer.
+ */
+static inline int adreno_compare_pm4_version(struct adreno_device *adreno_dev,
+	unsigned int version)
+{
+	if (adreno_dev->pm4_fw_version == version)
+		return 0;
+
+	return (adreno_dev->pm4_fw_version > version) ? 1 : -1;
+}
+
+/**
+ * adreno_compare_pfp_version() - Compare the PFP microcode version
+ * @adreno_dev: Pointer to the adreno_device struct
+ * @version: Version number to compare against
+ *
+ * Compare the current version against the specified version and return -1 if
+ * the current code is older, 0 if equal or 1 if newer.
+ */
+static inline int adreno_compare_pfp_version(struct adreno_device *adreno_dev,
+	unsigned int version)
+{
+	if (adreno_dev->pfp_fw_version == version)
+		return 0;
+
+	return (adreno_dev->pfp_fw_version > version) ? 1 : -1;
+}
+
+/*
+ * adreno_bootstrap_ucode() - Checks if Ucode bootstrapping is supported
+ * @adreno_dev:		Pointer to the the adreno device
+ */
+static inline int adreno_bootstrap_ucode(struct adreno_device *adreno_dev)
+{
+	return (ADRENO_FEATURE(adreno_dev, ADRENO_USE_BOOTSTRAP) &&
+		adreno_compare_pfp_version(adreno_dev,
+			adreno_dev->gpucore->pfp_bstrp_ver) >= 0) ? 1 : 0;
+}
+
+/**
+ * adreno_in_preempt_state() - Check if preemption state is equal to given state
+ * @adreno_dev: Device whose preemption state is checked
+ * @state: State to compare against
+ */
+static inline bool adreno_in_preempt_state(struct adreno_device *adreno_dev,
+			enum adreno_preempt_states state)
+{
+	return atomic_read(&adreno_dev->preempt.state) == state;
+}
+/**
+ * adreno_set_preempt_state() - Set the specified preemption state
+ * @adreno_dev: Device to change preemption state
+ * @state: State to set
+ */
+static inline void adreno_set_preempt_state(struct adreno_device *adreno_dev,
+		enum adreno_preempt_states state)
+{
+	/*
+	 * atomic_set doesn't use barriers, so we need to do it ourselves.  One
+	 * before...
+	 */
+	smp_wmb();
+	atomic_set(&adreno_dev->preempt.state, state);
+
+	/* ... and one after */
+	smp_wmb();
+}
+
+static inline bool adreno_is_preemption_enabled(
+				struct adreno_device *adreno_dev)
+{
+	return test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+}
+/**
+ * adreno_ctx_get_rb() - Return the ringbuffer that a context should
+ * use based on priority
+ * @adreno_dev: The adreno device that context is using
+ * @drawctxt: The context pointer
+ */
+static inline struct adreno_ringbuffer *adreno_ctx_get_rb(
+				struct adreno_device *adreno_dev,
+				struct adreno_context *drawctxt)
+{
+	struct kgsl_context *context;
+	int level;
+
+	if (!drawctxt)
+		return NULL;
+
+	context = &(drawctxt->base);
+
+	/*
+	 * If preemption is disabled then everybody needs to go on the same
+	 * ringbuffer
+	 */
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return &(adreno_dev->ringbuffers[0]);
+
+	/*
+	 * Math to convert the priority field in context structure to an RB ID.
+	 * Divide up the context priority based on number of ringbuffer levels.
+	 */
+	level = context->priority / adreno_dev->num_ringbuffers;
+	if (level < adreno_dev->num_ringbuffers)
+		return &(adreno_dev->ringbuffers[level]);
+	else
+		return &(adreno_dev->ringbuffers[
+				adreno_dev->num_ringbuffers - 1]);
+}
+
+/*
+ * adreno_compare_prio_level() - Compares 2 priority levels based on enum values
+ * @p1: First priority level
+ * @p2: Second priority level
+ *
+ * Returns greater than 0 if p1 is higher priority, 0 if levels are equal else
+ * less than 0
+ */
+static inline int adreno_compare_prio_level(int p1, int p2)
+{
+	return p2 - p1;
+}
+
+void adreno_readreg64(struct adreno_device *adreno_dev,
+		enum adreno_regs lo, enum adreno_regs hi, uint64_t *val);
+
+void adreno_writereg64(struct adreno_device *adreno_dev,
+		enum adreno_regs lo, enum adreno_regs hi, uint64_t val);
+
+unsigned int adreno_get_rptr(struct adreno_ringbuffer *rb);
+
+static inline bool adreno_rb_empty(struct adreno_ringbuffer *rb)
+{
+	return (adreno_get_rptr(rb) == rb->wptr);
+}
+
+static inline bool adreno_soft_fault_detect(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->fast_hang_detect &&
+		!test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+}
+
+static inline bool adreno_long_ib_detect(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->long_ib_detect &&
+		!test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+}
+
+/*
+ * adreno_support_64bit() - Check the feature flag only if it is in
+ * 64bit kernel otherwise return false
+ * adreno_dev: The adreno device
+ */
+#if BITS_PER_LONG == 64
+static inline bool adreno_support_64bit(struct adreno_device *adreno_dev)
+{
+	return ADRENO_FEATURE(adreno_dev, ADRENO_64BIT);
+}
+#else
+static inline bool adreno_support_64bit(struct adreno_device *adreno_dev)
+{
+	return false;
+}
+#endif /*BITS_PER_LONG*/
+
+static inline void adreno_ringbuffer_set_global(
+		struct adreno_device *adreno_dev, int name)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_sharedmem_writel(device,
+		&adreno_dev->ringbuffers[0].pagetable_desc,
+		PT_INFO_OFFSET(current_global_ptname), name);
+}
+
+static inline void adreno_ringbuffer_set_pagetable(struct adreno_ringbuffer *rb,
+		struct kgsl_pagetable *pt)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned long flags;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+
+	kgsl_sharedmem_writel(device, &rb->pagetable_desc,
+		PT_INFO_OFFSET(current_rb_ptname), pt->name);
+
+	kgsl_sharedmem_writeq(device, &rb->pagetable_desc,
+		PT_INFO_OFFSET(ttbr0), kgsl_mmu_pagetable_get_ttbr0(pt));
+
+	kgsl_sharedmem_writel(device, &rb->pagetable_desc,
+		PT_INFO_OFFSET(contextidr),
+		kgsl_mmu_pagetable_get_contextidr(pt));
+
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+}
+
+static inline unsigned int counter_delta(struct kgsl_device *device,
+			unsigned int reg, unsigned int *counter)
+{
+	unsigned int val;
+	unsigned int ret = 0;
+
+	/* Read the value */
+	kgsl_regread(device, reg, &val);
+
+	/* Return 0 for the first read */
+	if (*counter != 0) {
+		if (val < *counter)
+			ret = (0xFFFFFFFF - *counter) + val;
+		else
+			ret = val - *counter;
+	}
+
+	*counter = val;
+	return ret;
+}
+#endif /*__ADRENO_H */
diff --git a/drivers/gpu/msm/adreno_a3xx.c b/drivers/gpu/msm/adreno_a3xx.c
new file mode 100644
index 0000000..1a345e5
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a3xx.c
@@ -0,0 +1,1921 @@
+/* Copyright (c) 2012-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/firmware.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/msm_kgsl.h>
+
+#include "kgsl.h"
+#include "adreno.h"
+#include "kgsl_sharedmem.h"
+#include "a3xx_reg.h"
+#include "adreno_a3xx.h"
+#include "adreno_a4xx.h"
+#include "a4xx_reg.h"
+#include "adreno_cp_parser.h"
+#include "adreno_trace.h"
+#include "adreno_pm4types.h"
+#include "adreno_perfcounter.h"
+#include "adreno_snapshot.h"
+
+/*
+ * Define registers for a3xx that contain addresses used by the
+ * cp parser logic
+ */
+const unsigned int a3xx_cp_addr_regs[ADRENO_CP_ADDR_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0,
+				A3XX_VSC_PIPE_DATA_ADDRESS_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0,
+				A3XX_VSC_PIPE_DATA_LENGTH_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1,
+				A3XX_VSC_PIPE_DATA_ADDRESS_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1,
+				A3XX_VSC_PIPE_DATA_LENGTH_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2,
+				A3XX_VSC_PIPE_DATA_ADDRESS_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2,
+				A3XX_VSC_PIPE_DATA_LENGTH_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3,
+				A3XX_VSC_PIPE_DATA_ADDRESS_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3,
+				A3XX_VSC_PIPE_DATA_LENGTH_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4,
+				A3XX_VSC_PIPE_DATA_ADDRESS_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4,
+				A3XX_VSC_PIPE_DATA_LENGTH_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5,
+				A3XX_VSC_PIPE_DATA_ADDRESS_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5,
+				A3XX_VSC_PIPE_DATA_LENGTH_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6,
+				A3XX_VSC_PIPE_DATA_ADDRESS_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6,
+				A3XX_VSC_PIPE_DATA_LENGTH_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7,
+				A3XX_VSC_PIPE_DATA_ADDRESS_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7,
+				A3XX_VSC_PIPE_DATA_LENGTH_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0,
+				A3XX_VFD_FETCH_INSTR_1_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1,
+				A3XX_VFD_FETCH_INSTR_1_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2,
+				A3XX_VFD_FETCH_INSTR_1_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3,
+				A3XX_VFD_FETCH_INSTR_1_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4,
+				A3XX_VFD_FETCH_INSTR_1_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5,
+				A3XX_VFD_FETCH_INSTR_1_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6,
+				A3XX_VFD_FETCH_INSTR_1_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7,
+				A3XX_VFD_FETCH_INSTR_1_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8,
+				A3XX_VFD_FETCH_INSTR_1_8),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9,
+				A3XX_VFD_FETCH_INSTR_1_9),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10,
+				A3XX_VFD_FETCH_INSTR_1_A),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11,
+				A3XX_VFD_FETCH_INSTR_1_B),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12,
+				A3XX_VFD_FETCH_INSTR_1_C),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13,
+				A3XX_VFD_FETCH_INSTR_1_D),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14,
+				A3XX_VFD_FETCH_INSTR_1_E),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15,
+				A3XX_VFD_FETCH_INSTR_1_F),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_SIZE_ADDRESS,
+				A3XX_VSC_SIZE_ADDRESS),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR,
+				A3XX_SP_VS_PVT_MEM_ADDR_REG),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR,
+				A3XX_SP_FS_PVT_MEM_ADDR_REG),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_OBJ_START_REG,
+				A3XX_SP_VS_OBJ_START_REG),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_OBJ_START_REG,
+				A3XX_SP_FS_OBJ_START_REG),
+};
+
+static unsigned int adreno_a3xx_rbbm_clock_ctl_default(struct adreno_device
+							*adreno_dev)
+{
+	if (adreno_is_a320(adreno_dev))
+		return A320_RBBM_CLOCK_CTL_DEFAULT;
+	else if (adreno_is_a330v2(adreno_dev))
+		return A3XX_RBBM_CLOCK_CTL_DEFAULT;
+	else if (adreno_is_a330(adreno_dev))
+		return A330_RBBM_CLOCK_CTL_DEFAULT;
+	return A3XX_RBBM_CLOCK_CTL_DEFAULT;
+}
+
+static const unsigned int _a3xx_pwron_fixup_fs_instructions[] = {
+	0x00000000, 0x302CC300, 0x00000000, 0x302CC304,
+	0x00000000, 0x302CC308, 0x00000000, 0x302CC30C,
+	0x00000000, 0x302CC310, 0x00000000, 0x302CC314,
+	0x00000000, 0x302CC318, 0x00000000, 0x302CC31C,
+	0x00000000, 0x302CC320, 0x00000000, 0x302CC324,
+	0x00000000, 0x302CC328, 0x00000000, 0x302CC32C,
+	0x00000000, 0x302CC330, 0x00000000, 0x302CC334,
+	0x00000000, 0x302CC338, 0x00000000, 0x302CC33C,
+	0x00000000, 0x00000400, 0x00020000, 0x63808003,
+	0x00060004, 0x63828007, 0x000A0008, 0x6384800B,
+	0x000E000C, 0x6386800F, 0x00120010, 0x63888013,
+	0x00160014, 0x638A8017, 0x001A0018, 0x638C801B,
+	0x001E001C, 0x638E801F, 0x00220020, 0x63908023,
+	0x00260024, 0x63928027, 0x002A0028, 0x6394802B,
+	0x002E002C, 0x6396802F, 0x00320030, 0x63988033,
+	0x00360034, 0x639A8037, 0x003A0038, 0x639C803B,
+	0x003E003C, 0x639E803F, 0x00000000, 0x00000400,
+	0x00000003, 0x80D60003, 0x00000007, 0x80D60007,
+	0x0000000B, 0x80D6000B, 0x0000000F, 0x80D6000F,
+	0x00000013, 0x80D60013, 0x00000017, 0x80D60017,
+	0x0000001B, 0x80D6001B, 0x0000001F, 0x80D6001F,
+	0x00000023, 0x80D60023, 0x00000027, 0x80D60027,
+	0x0000002B, 0x80D6002B, 0x0000002F, 0x80D6002F,
+	0x00000033, 0x80D60033, 0x00000037, 0x80D60037,
+	0x0000003B, 0x80D6003B, 0x0000003F, 0x80D6003F,
+	0x00000000, 0x03000000, 0x00000000, 0x00000000,
+};
+
+static void a3xx_efuse_speed_bin(struct adreno_device *adreno_dev)
+{
+	unsigned int val;
+	unsigned int speed_bin[3];
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (of_property_read_u32_array(device->pdev->dev.of_node,
+		"qcom,gpu-speed-bin", speed_bin, 3))
+		return;
+
+	adreno_efuse_read_u32(adreno_dev, speed_bin[0], &val);
+
+	adreno_dev->speed_bin = (val & speed_bin[1]) >> speed_bin[2];
+}
+
+static const struct {
+	int (*check)(struct adreno_device *adreno_dev);
+	void (*func)(struct adreno_device *adreno_dev);
+} a3xx_efuse_funcs[] = {
+	{ adreno_is_a306a, a3xx_efuse_speed_bin },
+};
+
+static void a3xx_check_features(struct adreno_device *adreno_dev)
+{
+	unsigned int i;
+
+	if (adreno_efuse_map(adreno_dev))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(a3xx_efuse_funcs); i++) {
+		if (a3xx_efuse_funcs[i].check(adreno_dev))
+			a3xx_efuse_funcs[i].func(adreno_dev);
+	}
+
+	adreno_efuse_unmap(adreno_dev);
+}
+
+/**
+ * _a3xx_pwron_fixup() - Initialize a special command buffer to run a
+ * post-power collapse shader workaround
+ * @adreno_dev: Pointer to a adreno_device struct
+ *
+ * Some targets require a special workaround shader to be executed after
+ * power-collapse.  Construct the IB once at init time and keep it
+ * handy
+ *
+ * Returns: 0 on success or negative on error
+ */
+static int _a3xx_pwron_fixup(struct adreno_device *adreno_dev)
+{
+	unsigned int *cmds;
+	int count = ARRAY_SIZE(_a3xx_pwron_fixup_fs_instructions);
+	int ret;
+
+	/* Return if the fixup is already in place */
+	if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
+		return 0;
+
+	ret = kgsl_allocate_global(KGSL_DEVICE(adreno_dev),
+		&adreno_dev->pwron_fixup, PAGE_SIZE,
+		KGSL_MEMFLAGS_GPUREADONLY, 0, "pwron_fixup");
+
+	if (ret)
+		return ret;
+
+	cmds = adreno_dev->pwron_fixup.hostptr;
+
+	*cmds++ = cp_type0_packet(A3XX_UCHE_CACHE_INVALIDATE0_REG, 2);
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x90000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmds++ = A3XX_RBBM_CLOCK_CTL;
+	*cmds++ = 0xFFFCFFFF;
+	*cmds++ = 0x00010000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1);
+	*cmds++ = 0x1E000150;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
+	*cmds++ = 0x1E000150;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1);
+	*cmds++ = 0x1E000150;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_1_REG, 1);
+	*cmds++ = 0x00000040;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_2_REG, 1);
+	*cmds++ = 0x80000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_3_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_VS_CONTROL_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_FS_CONTROL_REG, 1);
+	*cmds++ = 0x0D001002;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_VSPRESV_RANGE_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONST_FSPRESV_RANGE_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_0_REG, 1);
+	*cmds++ = 0x00401101;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_1_REG, 1);
+	*cmds++ = 0x00000400;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_2_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_3_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_4_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_5_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_NDRANGE_6_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_1_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_CONST_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_X_REG, 1);
+	*cmds++ = 0x00000010;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Y_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_KERNEL_GROUP_Z_REG, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_WG_OFFSET_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_SP_CTRL_REG, 1);
+	*cmds++ = 0x00040000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG0, 1);
+	*cmds++ = 0x0000000A;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_CTRL_REG1, 1);
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_PARAM_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_6, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OUT_REG_7, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_VPC_DST_REG_3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_OFFSET_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_OBJ_START_REG, 1);
+	*cmds++ = 0x00000004;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_PARAM_REG, 1);
+	*cmds++ = 0x04008001;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_ADDR_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_PVT_MEM_SIZE_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_VS_LENGTH_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG0, 1);
+	*cmds++ = 0x0DB0400A;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_CTRL_REG1, 1);
+	*cmds++ = 0x00300402;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_OFFSET_REG, 1);
+	*cmds++ = 0x00010000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_OBJ_START_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_PARAM_REG, 1);
+	*cmds++ = 0x04008001;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_ADDR_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_PVT_MEM_SIZE_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_FLAT_SHAD_MODE_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_OUTPUT_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_MRT_REG_3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_IMAGE_OUTPUT_REG_3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_SP_FS_LENGTH_REG, 1);
+	*cmds++ = 0x0000000D;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_CLIP_CNTL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_GB_CLIP_ADJ, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XOFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_XSCALE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YOFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_YSCALE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZOFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_VPORT_ZSCALE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W4, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_X5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Y5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_Z5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_CL_USER_PLANE_W5, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_MINMAX, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_POINT_SIZE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_OFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_POLY_OFFSET_SCALE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SU_MODE_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_TL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_SCREEN_SCISSOR_BR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_BR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_SC_WINDOW_SCISSOR_TL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_TSE_DEBUG_ECO, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER0_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER1_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER2_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_GRAS_PERFCOUNTER3_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MODE_CONTROL, 1);
+	*cmds++ = 0x00008000;
+	*cmds++ = cp_type0_packet(A3XX_RB_RENDER_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MSAA_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_ALPHA_REFERENCE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_CONTROL3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_INFO3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BUF_BASE3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_MRT_BLEND_CONTROL3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_BLEND_RED, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_BLEND_GREEN, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_BLEND_BLUE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_BLEND_ALPHA, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW0, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW1, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW2, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_CLEAR_COLOR_DW3, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_COPY_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_BASE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_PITCH, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_COPY_DEST_INFO, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEPTH_CLEAR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_INFO, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEPTH_BUF_PITCH, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_CLEAR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_INFO, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_BUF_PITCH, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_STENCIL_REF_MASK_BF, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_LRZ_VSC_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_WINDOW_OFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_SAMPLE_COUNT_ADDR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MIN, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_Z_CLAMP_MAX, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_GMEM_BASE_ADDR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_DEBUG_ECO_CONTROLS_ADDR, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER0_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_PERFCOUNTER1_SELECT, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_RB_FRAME_BUFFER_DIMENSION, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
+	*cmds++ = (1 << CP_LOADSTATE_DSTOFFSET_SHIFT) |
+		(0 << CP_LOADSTATE_STATESRC_SHIFT) |
+		(6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+		(1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT) |
+		(0 << CP_LOADSTATE_EXTSRCADDR_SHIFT);
+	*cmds++ = 0x00400000;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
+	*cmds++ = (2 << CP_LOADSTATE_DSTOFFSET_SHIFT) |
+		(6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+		(1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT);
+	*cmds++ = 0x00400220;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 4);
+	*cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+		(1 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = (1 << CP_LOADSTATE_STATETYPE_SHIFT);
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 2 + count);
+	*cmds++ = (6 << CP_LOADSTATE_STATEBLOCKID_SHIFT) |
+		(13 << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = 0x00000000;
+
+	memcpy(cmds, _a3xx_pwron_fixup_fs_instructions, count << 2);
+
+	cmds += count;
+
+	*cmds++ = cp_type3_packet(CP_EXEC_CL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CL_CONTROL_0_REG, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A3XX_HLSQ_CONTROL_0_REG, 1);
+	*cmds++ = 0x1E000150;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_SET_CONSTANT, 2);
+	*cmds++ = CP_REG(A3XX_HLSQ_CONTROL_0_REG);
+	*cmds++ = 0x1E000050;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_REG_RMW, 3);
+	*cmds++ = A3XX_RBBM_CLOCK_CTL;
+	*cmds++ = 0xFFFCFFFF;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+
+	/*
+	 * Remember the number of dwords in the command buffer for when we
+	 * program the indirect buffer call in the ringbuffer
+	 */
+	adreno_dev->pwron_fixup_dwords =
+		(cmds - (unsigned int *) adreno_dev->pwron_fixup.hostptr);
+
+	/* Mark the flag in ->priv to show that we have the fix */
+	set_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv);
+	return 0;
+}
+
+static void a3xx_platform_setup(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev;
+
+	if (adreno_is_a306(adreno_dev) || adreno_is_a306a(adreno_dev)
+			|| adreno_is_a304(adreno_dev)) {
+		gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+		gpudev->vbif_xin_halt_ctrl0_mask =
+				A30X_VBIF_XIN_HALT_CTRL0_MASK;
+	}
+
+	/* Check efuse bits for various capabilties */
+	a3xx_check_features(adreno_dev);
+}
+
+static int a3xx_send_me_init(struct adreno_device *adreno_dev,
+			 struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds;
+	int ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 18);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	*cmds++ = cp_type3_packet(CP_ME_INIT, 17);
+
+	*cmds++ = 0x000003f7;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000080;
+	*cmds++ = 0x00000100;
+	*cmds++ = 0x00000180;
+	*cmds++ = 0x00006600;
+	*cmds++ = 0x00000150;
+	*cmds++ = 0x0000014e;
+	*cmds++ = 0x00000154;
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	/* Enable protected mode registers for A3XX/A4XX */
+	*cmds++ = 0x20000000;
+
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
+	if (ret) {
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		dev_err(device->dev, "CP initialization failed to idle\n");
+		kgsl_device_snapshot(device, NULL);
+	}
+
+	return ret;
+}
+
+static int a3xx_rb_start(struct adreno_device *adreno_dev,
+			 unsigned int start_type)
+{
+	struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+	int ret;
+
+	/*
+	 * The size of the ringbuffer in the hardware is the log2
+	 * representation of the size in quadwords (sizedwords / 2).
+	 * Also disable the host RPTR shadow register as it might be unreliable
+	 * in certain circumstances.
+	 */
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_CNTL,
+		(ilog2(KGSL_RB_DWORDS >> 1) & 0x3F) |
+		(1 << 27));
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_BASE,
+			rb->buffer_desc.gpuaddr);
+
+	ret = a3xx_microcode_load(adreno_dev, start_type);
+	if (ret == 0) {
+		/* clear ME_HALT to start micro engine */
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0);
+
+		ret = a3xx_send_me_init(adreno_dev, rb);
+	}
+
+	return ret;
+}
+
+/*
+ * a3xx_init() - Initialize gpu specific data
+ * @adreno_dev: Pointer to adreno device
+ */
+static void a3xx_init(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	_a3xx_pwron_fixup(adreno_dev);
+
+	/* Adjust snapshot section sizes according to core */
+	if ((adreno_is_a330(adreno_dev) || adreno_is_a305b(adreno_dev))) {
+		gpudev->snapshot_data->sect_sizes->cp_pfp =
+					A320_SNAPSHOT_CP_STATE_SECTION_SIZE;
+		gpudev->snapshot_data->sect_sizes->roq =
+					A320_SNAPSHOT_ROQ_SECTION_SIZE;
+		gpudev->snapshot_data->sect_sizes->cp_merciu =
+					A320_SNAPSHOT_CP_MERCIU_SECTION_SIZE;
+	}
+}
+
+/*
+ * a3xx_err_callback() - Call back for a3xx error interrupts
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+static void a3xx_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	switch (bit) {
+	case A3XX_INT_RBBM_AHB_ERROR: {
+		kgsl_regread(device, A3XX_RBBM_AHB_ERROR_STATUS, &reg);
+
+		/*
+		 * Return the word address of the erroring register so that it
+		 * matches the register specification
+		 */
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
+			reg & (1 << 28) ? "WRITE" : "READ",
+			(reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
+			(reg >> 24) & 0xF);
+
+		/* Clear the error */
+		kgsl_regwrite(device, A3XX_RBBM_AHB_CMD, (1 << 3));
+		break;
+	}
+	case A3XX_INT_RBBM_ATB_BUS_OVERFLOW:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ATB bus oveflow\n");
+		break;
+	case A3XX_INT_CP_T0_PACKET_IN_IB:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer TO packet in IB interrupt\n");
+		break;
+	case A3XX_INT_CP_OPCODE_ERROR:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer opcode error interrupt\n");
+		break;
+	case A3XX_INT_CP_RESERVED_BIT_ERROR:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer reserved bit error interrupt\n");
+		break;
+	case A3XX_INT_CP_HW_FAULT:
+		kgsl_regread(device, A3XX_CP_HW_FAULT, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"CP | Ringbuffer HW fault | status=%x\n", reg);
+		break;
+	case A3XX_INT_CP_REG_PROTECT_FAULT:
+		kgsl_regread(device, A3XX_CP_PROTECT_STATUS, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"CP | Protected mode error| %s | addr=%x\n",
+			reg & (1 << 24) ? "WRITE" : "READ",
+			(reg & 0xFFFFF) >> 2);
+		break;
+	case A3XX_INT_CP_AHB_ERROR_HALT:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer AHB error interrupt\n");
+		break;
+	case A3XX_INT_UCHE_OOB_ACCESS:
+		KGSL_DRV_CRIT_RATELIMIT(device, "UCHE: Out of bounds access\n");
+		break;
+	default:
+		KGSL_DRV_CRIT_RATELIMIT(device, "Unknown interrupt\n");
+	}
+}
+
+#define A3XX_INT_MASK \
+	((1 << A3XX_INT_RBBM_AHB_ERROR) |        \
+	 (1 << A3XX_INT_RBBM_ATB_BUS_OVERFLOW) | \
+	 (1 << A3XX_INT_CP_T0_PACKET_IN_IB) |    \
+	 (1 << A3XX_INT_CP_OPCODE_ERROR) |       \
+	 (1 << A3XX_INT_CP_RESERVED_BIT_ERROR) | \
+	 (1 << A3XX_INT_CP_HW_FAULT) |           \
+	 (1 << A3XX_INT_CP_IB1_INT) |            \
+	 (1 << A3XX_INT_CP_IB2_INT) |            \
+	 (1 << A3XX_INT_CP_RB_INT) |             \
+	 (1 << A3XX_INT_CACHE_FLUSH_TS) |	 \
+	 (1 << A3XX_INT_CP_REG_PROTECT_FAULT) |  \
+	 (1 << A3XX_INT_CP_AHB_ERROR_HALT) |     \
+	 (1 << A3XX_INT_UCHE_OOB_ACCESS))
+
+static struct adreno_irq_funcs a3xx_irq_funcs[32] = {
+	ADRENO_IRQ_CALLBACK(NULL),                    /* 0 - RBBM_GPU_IDLE */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 1 - RBBM_AHB_ERROR */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 2 - RBBM_REG_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 3 - RBBM_ME_MS_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 4 - RBBM_PFP_MS_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 5 - RBBM_ATB_BUS_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(NULL),  /* 6 - RBBM_VFD_ERROR */
+	ADRENO_IRQ_CALLBACK(NULL),	/* 7 - CP_SW */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 8 - CP_T0_PACKET_IN_IB */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 9 - CP_OPCODE_ERROR */
+	/* 10 - CP_RESERVED_BIT_ERROR */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 11 - CP_HW_FAULT */
+	ADRENO_IRQ_CALLBACK(NULL),	             /* 12 - CP_DMA */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback),   /* 13 - CP_IB2_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback),   /* 14 - CP_IB1_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback),   /* 15 - CP_RB_INT */
+	/* 16 - CP_REG_PROTECT_FAULT */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 17 - CP_RB_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 18 - CP_VS_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 19 - CP_PS_DONE_TS */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
+	/* 21 - CP_AHB_ERROR_FAULT */
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 22 - Unused */
+	ADRENO_IRQ_CALLBACK(NULL),	       /* 23 - Unused */
+	/* 24 - MISC_HANG_DETECT */
+	ADRENO_IRQ_CALLBACK(adreno_hang_int_callback),
+	ADRENO_IRQ_CALLBACK(a3xx_err_callback),  /* 25 - UCHE_OOB_ACCESS */
+};
+
+static struct adreno_irq a3xx_irq = {
+	.funcs = a3xx_irq_funcs,
+	.mask = A3XX_INT_MASK,
+};
+
+/* VBIF registers start after 0x3000 so use 0x0 as end of list marker */
+static const struct adreno_vbif_data a304_vbif[] = {
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a305_vbif[] = {
+	/* Set up 16 deep read/write request queues */
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010 },
+	/* Enable WR-REQ */
+	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000FF },
+	/* Set up round robin arbitration between both AXI ports */
+	{ A3XX_VBIF_ARB_CTL, 0x00000030 },
+	/* Set up AOOO */
+	{ A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C },
+	{ A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a305b_vbif[] = {
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x00181818 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x00181818 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000018 },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000018 },
+	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000303 },
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a305c_vbif[] = {
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x00101010 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x00101010 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010 },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010 },
+	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x00000101 },
+	{ A3XX_VBIF_ARB_CTL, 0x00000010 },
+	/* Set up AOOO */
+	{ A3XX_VBIF_OUT_AXI_AOOO_EN, 0x00000007 },
+	{ A3XX_VBIF_OUT_AXI_AOOO, 0x00070007 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a306_vbif[] = {
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x0000000A },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x0000000A },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a306a_vbif[] = {
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00000010 },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00000010 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a310_vbif[] = {
+	{ A3XX_VBIF_ABIT_SORT, 0x0001000F },
+	{ A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
+	/* Enable WR-REQ */
+	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001 },
+	/* Set up VBIF_ROUND_ROBIN_QOS_ARB */
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x3 },
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x18180C0C },
+	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x1818000C },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a320_vbif[] = {
+	/* Set up 16 deep read/write request queues */
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_IN_RD_LIM_CONF1, 0x10101010 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x10101010 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF1, 0x10101010 },
+	/* Enable WR-REQ */
+	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x0000FF },
+	/* Set up round robin arbitration between both AXI ports */
+	{ A3XX_VBIF_ARB_CTL, 0x00000030 },
+	/* Set up AOOO */
+	{ A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003C },
+	{ A3XX_VBIF_OUT_AXI_AOOO, 0x003C003C },
+	/* Enable 1K sort */
+	{ A3XX_VBIF_ABIT_SORT, 0x000000FF },
+	{ A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a330_vbif[] = {
+	/* Set up 16 deep read/write request queues */
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x18181818 },
+	{ A3XX_VBIF_IN_RD_LIM_CONF1, 0x00001818 },
+	{ A3XX_VBIF_OUT_RD_LIM_CONF0, 0x00001818 },
+	{ A3XX_VBIF_OUT_WR_LIM_CONF0, 0x00001818 },
+	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF0, 0x18181818 },
+	{ A3XX_VBIF_IN_WR_LIM_CONF1, 0x00001818 },
+	/* Enable WR-REQ */
+	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003F },
+	/* Set up round robin arbitration between both AXI ports */
+	{ A3XX_VBIF_ARB_CTL, 0x00000030 },
+	/* Set up VBIF_ROUND_ROBIN_QOS_ARB */
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0001 },
+	/* Set up AOOO */
+	{ A3XX_VBIF_OUT_AXI_AOOO_EN, 0x0000003F },
+	{ A3XX_VBIF_OUT_AXI_AOOO, 0x003F003F },
+	/* Enable 1K sort */
+	{ A3XX_VBIF_ABIT_SORT, 0x0001003F },
+	{ A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
+	/* Disable VBIF clock gating. This is to enable AXI running
+	 * higher frequency than GPU.
+	 */
+	{ A3XX_VBIF_CLKON, 1 },
+	{0, 0},
+};
+
+/*
+ * Most of the VBIF registers on 8974v2 have the correct values at power on, so
+ * we won't modify those if we don't need to
+ */
+static const struct adreno_vbif_data a330v2_vbif[] = {
+	/* Enable 1k sort */
+	{ A3XX_VBIF_ABIT_SORT, 0x0001003F },
+	{ A3XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
+	/* Enable WR-REQ */
+	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x00003F },
+	{ A3XX_VBIF_DDR_OUT_MAX_BURST, 0x0000303 },
+	/* Set up VBIF_ROUND_ROBIN_QOS_ARB */
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{0, 0},
+};
+
+/*
+ * Most of the VBIF registers on a330v2.1 have the correct values at power on,
+ * so we won't modify those if we don't need to
+ */
+static const struct adreno_vbif_data a330v21_vbif[] = {
+	/* Enable WR-REQ */
+	{ A3XX_VBIF_GATE_OFF_WRREQ_EN, 0x1 },
+	/* Set up VBIF_ROUND_ROBIN_QOS_ARB */
+	{ A3XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x0003 },
+	{ A3XX_VBIF_IN_RD_LIM_CONF0, 0x18180c0c },
+	{0, 0},
+};
+
+static const struct adreno_vbif_platform a3xx_vbif_platforms[] = {
+	{ adreno_is_a304, a304_vbif },
+	{ adreno_is_a305, a305_vbif },
+	{ adreno_is_a305c, a305c_vbif },
+	{ adreno_is_a306, a306_vbif },
+	{ adreno_is_a306a, a306a_vbif },
+	{ adreno_is_a310, a310_vbif },
+	{ adreno_is_a320, a320_vbif },
+	/* A330v2.1 needs to be ahead of A330v2 so the right device matches */
+	{ adreno_is_a330v21, a330v21_vbif},
+	/* A330v2 needs to be ahead of A330 so the right device matches */
+	{ adreno_is_a330v2, a330v2_vbif },
+	{ adreno_is_a330, a330_vbif },
+	{ adreno_is_a305b, a305b_vbif },
+};
+
+/*
+ * Define the available perfcounter groups - these get used by
+ * adreno_perfcounter_get and adreno_perfcounter_put
+ */
+
+static struct adreno_perfcount_register a3xx_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_CP_0_LO,
+		A3XX_RBBM_PERFCTR_CP_0_HI, 0, A3XX_CP_PERFCOUNTER_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_rbbm[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_0_LO,
+		A3XX_RBBM_PERFCTR_RBBM_0_HI, 1, A3XX_RBBM_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RBBM_1_LO,
+		A3XX_RBBM_PERFCTR_RBBM_1_HI, 2, A3XX_RBBM_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_0_LO,
+		A3XX_RBBM_PERFCTR_PC_0_HI, 3, A3XX_PC_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_1_LO,
+		A3XX_RBBM_PERFCTR_PC_1_HI, 4, A3XX_PC_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_2_LO,
+		A3XX_RBBM_PERFCTR_PC_2_HI, 5, A3XX_PC_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PC_3_LO,
+		A3XX_RBBM_PERFCTR_PC_3_HI, 6, A3XX_PC_PERFCOUNTER3_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_0_LO,
+		A3XX_RBBM_PERFCTR_VFD_0_HI, 7, A3XX_VFD_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VFD_1_LO,
+		A3XX_RBBM_PERFCTR_VFD_1_HI, 8, A3XX_VFD_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_0_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_0_HI, 9,
+		A3XX_HLSQ_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_1_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_1_HI, 10,
+		A3XX_HLSQ_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_2_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_2_HI, 11,
+		A3XX_HLSQ_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_3_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_3_HI, 12,
+		A3XX_HLSQ_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_4_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_4_HI, 13,
+		A3XX_HLSQ_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_HLSQ_5_LO,
+		A3XX_RBBM_PERFCTR_HLSQ_5_HI, 14,
+		A3XX_HLSQ_PERFCOUNTER5_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_0_LO,
+		A3XX_RBBM_PERFCTR_VPC_0_HI, 15, A3XX_VPC_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_VPC_1_LO,
+		A3XX_RBBM_PERFCTR_VPC_1_HI, 16, A3XX_VPC_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_0_LO,
+		A3XX_RBBM_PERFCTR_TSE_0_HI, 17, A3XX_GRAS_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TSE_1_LO,
+		A3XX_RBBM_PERFCTR_TSE_1_HI, 18, A3XX_GRAS_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_0_LO,
+		A3XX_RBBM_PERFCTR_RAS_0_HI, 19, A3XX_GRAS_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RAS_1_LO,
+		A3XX_RBBM_PERFCTR_RAS_1_HI, 20, A3XX_GRAS_PERFCOUNTER3_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_0_LO,
+		A3XX_RBBM_PERFCTR_UCHE_0_HI, 21,
+		A3XX_UCHE_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_1_LO,
+		A3XX_RBBM_PERFCTR_UCHE_1_HI, 22,
+		A3XX_UCHE_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_2_LO,
+		A3XX_RBBM_PERFCTR_UCHE_2_HI, 23,
+		A3XX_UCHE_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_3_LO,
+		A3XX_RBBM_PERFCTR_UCHE_3_HI, 24,
+		A3XX_UCHE_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_4_LO,
+		A3XX_RBBM_PERFCTR_UCHE_4_HI, 25,
+		A3XX_UCHE_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_UCHE_5_LO,
+		A3XX_RBBM_PERFCTR_UCHE_5_HI, 26,
+		A3XX_UCHE_PERFCOUNTER5_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_0_LO,
+		A3XX_RBBM_PERFCTR_TP_0_HI, 27, A3XX_TP_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_1_LO,
+		A3XX_RBBM_PERFCTR_TP_1_HI, 28, A3XX_TP_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_2_LO,
+		A3XX_RBBM_PERFCTR_TP_2_HI, 29, A3XX_TP_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_3_LO,
+		A3XX_RBBM_PERFCTR_TP_3_HI, 30, A3XX_TP_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_4_LO,
+		A3XX_RBBM_PERFCTR_TP_4_HI, 31, A3XX_TP_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_TP_5_LO,
+		A3XX_RBBM_PERFCTR_TP_5_HI, 32, A3XX_TP_PERFCOUNTER5_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_0_LO,
+		A3XX_RBBM_PERFCTR_SP_0_HI, 33, A3XX_SP_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_1_LO,
+		A3XX_RBBM_PERFCTR_SP_1_HI, 34, A3XX_SP_PERFCOUNTER1_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_2_LO,
+		A3XX_RBBM_PERFCTR_SP_2_HI, 35, A3XX_SP_PERFCOUNTER2_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_3_LO,
+		A3XX_RBBM_PERFCTR_SP_3_HI, 36, A3XX_SP_PERFCOUNTER3_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_4_LO,
+		A3XX_RBBM_PERFCTR_SP_4_HI, 37, A3XX_SP_PERFCOUNTER4_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_5_LO,
+		A3XX_RBBM_PERFCTR_SP_5_HI, 38, A3XX_SP_PERFCOUNTER5_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_6_LO,
+		A3XX_RBBM_PERFCTR_SP_6_HI, 39, A3XX_SP_PERFCOUNTER6_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_SP_7_LO,
+		A3XX_RBBM_PERFCTR_SP_7_HI, 40, A3XX_SP_PERFCOUNTER7_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_0_LO,
+		A3XX_RBBM_PERFCTR_RB_0_HI, 41, A3XX_RB_PERFCOUNTER0_SELECT },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_RB_1_LO,
+		A3XX_RBBM_PERFCTR_RB_1_HI, 42, A3XX_RB_PERFCOUNTER1_SELECT },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_0_LO,
+		A3XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_RBBM_PERFCTR_PWR_1_LO,
+		A3XX_RBBM_PERFCTR_PWR_1_HI, -1, 0 },
+};
+
+static struct adreno_perfcount_register a3xx_perfcounters_vbif[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT0_LO,
+		A3XX_VBIF_PERF_CNT0_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_CNT1_LO,
+		A3XX_VBIF_PERF_CNT1_HI, -1, 0 },
+};
+static struct adreno_perfcount_register a3xx_perfcounters_vbif_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT0_LO,
+		A3XX_VBIF_PERF_PWR_CNT0_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT1_LO,
+		A3XX_VBIF_PERF_PWR_CNT1_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF_PERF_PWR_CNT2_LO,
+		A3XX_VBIF_PERF_PWR_CNT2_HI, -1, 0 },
+};
+static struct adreno_perfcount_register a3xx_perfcounters_vbif2[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW0,
+		A3XX_VBIF2_PERF_CNT_HIGH0, -1, A3XX_VBIF2_PERF_CNT_SEL0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW1,
+		A3XX_VBIF2_PERF_CNT_HIGH1, -1, A3XX_VBIF2_PERF_CNT_SEL1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW2,
+		A3XX_VBIF2_PERF_CNT_HIGH2, -1, A3XX_VBIF2_PERF_CNT_SEL2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A3XX_VBIF2_PERF_CNT_LOW3,
+		A3XX_VBIF2_PERF_CNT_HIGH3, -1, A3XX_VBIF2_PERF_CNT_SEL3 },
+};
+/*
+ * Placing EN register in select field since vbif perf counters
+ * dont have select register to program
+ */
+static struct adreno_perfcount_register a3xx_perfcounters_vbif2_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0,
+		0, A3XX_VBIF2_PERF_PWR_CNT_LOW0,
+		A3XX_VBIF2_PERF_PWR_CNT_HIGH0, -1,
+		A3XX_VBIF2_PERF_PWR_CNT_EN0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0,
+		0, A3XX_VBIF2_PERF_PWR_CNT_LOW1,
+		A3XX_VBIF2_PERF_PWR_CNT_HIGH1, -1,
+		A3XX_VBIF2_PERF_PWR_CNT_EN1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0,
+		0, A3XX_VBIF2_PERF_PWR_CNT_LOW2,
+		A3XX_VBIF2_PERF_PWR_CNT_HIGH2, -1,
+		A3XX_VBIF2_PERF_PWR_CNT_EN2 },
+};
+
+#define A3XX_PERFCOUNTER_GROUP(offset, name) \
+	ADRENO_PERFCOUNTER_GROUP(a3xx, offset, name)
+
+#define A3XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(a3xx, offset, name, flags)
+
+static struct adreno_perfcount_group a3xx_perfcounter_groups[] = {
+	A3XX_PERFCOUNTER_GROUP(CP, cp),
+	A3XX_PERFCOUNTER_GROUP(RBBM, rbbm),
+	A3XX_PERFCOUNTER_GROUP(PC, pc),
+	A3XX_PERFCOUNTER_GROUP(VFD, vfd),
+	A3XX_PERFCOUNTER_GROUP(HLSQ, hlsq),
+	A3XX_PERFCOUNTER_GROUP(VPC, vpc),
+	A3XX_PERFCOUNTER_GROUP(TSE, tse),
+	A3XX_PERFCOUNTER_GROUP(RAS, ras),
+	A3XX_PERFCOUNTER_GROUP(UCHE, uche),
+	A3XX_PERFCOUNTER_GROUP(TP, tp),
+	A3XX_PERFCOUNTER_GROUP(SP, sp),
+	A3XX_PERFCOUNTER_GROUP(RB, rb),
+	A3XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+	A3XX_PERFCOUNTER_GROUP(VBIF, vbif),
+	A3XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+};
+
+static struct adreno_perfcounters a3xx_perfcounters = {
+	a3xx_perfcounter_groups,
+	ARRAY_SIZE(a3xx_perfcounter_groups),
+};
+
+static struct adreno_ft_perf_counters a3xx_ft_perf_counters[] = {
+	{KGSL_PERFCOUNTER_GROUP_SP, SP_ALU_ACTIVE_CYCLES},
+	{KGSL_PERFCOUNTER_GROUP_SP, SP0_ICL1_MISSES},
+	{KGSL_PERFCOUNTER_GROUP_SP, SP_FS_CFLOW_INSTRUCTIONS},
+	{KGSL_PERFCOUNTER_GROUP_TSE, TSE_INPUT_PRIM_NUM},
+};
+
+static void a3xx_perfcounter_init(struct adreno_device *adreno_dev)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+
+	/* SP[3] counter is broken on a330 so disable it if a330 device */
+	if (adreno_is_a330(adreno_dev))
+		a3xx_perfcounters_sp[3].countable = KGSL_PERFCOUNTER_BROKEN;
+
+	if (counters &&
+		(adreno_is_a306(adreno_dev) || adreno_is_a304(adreno_dev) ||
+		adreno_is_a306a(adreno_dev))) {
+		counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF].regs =
+			a3xx_perfcounters_vbif2;
+		counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF_PWR].regs =
+			a3xx_perfcounters_vbif2_pwr;
+	}
+
+	/*
+	 * Enable the GPU busy count counter. This is a fixed counter on
+	 * A3XX so we don't need to bother checking the return value
+	 */
+	adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
+		NULL, NULL, PERFCOUNTER_FLAG_KERNEL);
+}
+
+static void a3xx_perfcounter_close(struct adreno_device *adreno_dev)
+{
+	adreno_perfcounter_put(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
+		PERFCOUNTER_FLAG_KERNEL);
+}
+
+/**
+ * a3xx_protect_init() - Initializes register protection on a3xx
+ * @adreno_dev: Pointer to the device structure
+ * Performs register writes to enable protected access to sensitive
+ * registers
+ */
+static void a3xx_protect_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int index = 0;
+	struct kgsl_protected_registers *iommu_regs;
+
+	/* enable access protection to privileged registers */
+	kgsl_regwrite(device, A3XX_CP_PROTECT_CTRL, 0x00000007);
+
+	/* RBBM registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x18, 0);
+	adreno_set_protected_registers(adreno_dev, &index, 0x20, 2);
+	adreno_set_protected_registers(adreno_dev, &index, 0x33, 0);
+	adreno_set_protected_registers(adreno_dev, &index, 0x42, 0);
+	adreno_set_protected_registers(adreno_dev, &index, 0x50, 4);
+	adreno_set_protected_registers(adreno_dev, &index, 0x63, 0);
+	adreno_set_protected_registers(adreno_dev, &index, 0x100, 4);
+
+	/* CP registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x1C0, 5);
+	adreno_set_protected_registers(adreno_dev, &index, 0x1EC, 1);
+	adreno_set_protected_registers(adreno_dev, &index, 0x1F6, 1);
+	adreno_set_protected_registers(adreno_dev, &index, 0x1F8, 2);
+	adreno_set_protected_registers(adreno_dev, &index, 0x45E, 2);
+	adreno_set_protected_registers(adreno_dev, &index, 0x460, 4);
+
+	/* RB registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xCC0, 0);
+
+	/* VBIF registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x3000, 6);
+
+	/* SMMU registers */
+	iommu_regs = kgsl_mmu_get_prot_regs(&device->mmu);
+	if (iommu_regs)
+		adreno_set_protected_registers(adreno_dev, &index,
+				iommu_regs->base, iommu_regs->range);
+}
+
+static void a3xx_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	adreno_vbif_start(adreno_dev, a3xx_vbif_platforms,
+			ARRAY_SIZE(a3xx_vbif_platforms));
+
+	/* Make all blocks contribute to the GPU BUSY perf counter */
+	kgsl_regwrite(device, A3XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
+
+	/* Tune the hystersis counters for SP and CP idle detection */
+	kgsl_regwrite(device, A3XX_RBBM_SP_HYST_CNT, 0x10);
+	kgsl_regwrite(device, A3XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
+
+	/*
+	 * Enable the RBBM error reporting bits.  This lets us get
+	 * useful information on failure
+	 */
+
+	kgsl_regwrite(device, A3XX_RBBM_AHB_CTL0, 0x00000001);
+
+	/* Enable AHB error reporting */
+	kgsl_regwrite(device, A3XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
+
+	/* Turn on the power counters */
+	kgsl_regwrite(device, A3XX_RBBM_RBBM_CTL, 0x00030000);
+
+	/*
+	 * Turn on hang detection - this spews a lot of useful information
+	 * into the RBBM registers on a hang
+	 */
+	if (adreno_is_a330v2(adreno_dev)) {
+		set_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
+		gpudev->irq->mask |= (1 << A3XX_INT_MISC_HANG_DETECT);
+		kgsl_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
+				(1 << 31) | 0xFFFF);
+	} else
+		kgsl_regwrite(device, A3XX_RBBM_INTERFACE_HANG_INT_CTL,
+				(1 << 16) | 0xFFF);
+
+	/* Enable 64-byte cacheline size. HW Default is 32-byte (0x000000E0). */
+	kgsl_regwrite(device, A3XX_UCHE_CACHE_MODE_CONTROL_REG, 0x00000001);
+
+	/* Enable VFD to access most of the UCHE (7 ways out of 8) */
+	kgsl_regwrite(device, A3XX_UCHE_CACHE_WAYS_VFD, 0x07);
+
+	/* Enable Clock gating */
+	kgsl_regwrite(device, A3XX_RBBM_CLOCK_CTL,
+		adreno_a3xx_rbbm_clock_ctl_default(adreno_dev));
+
+	if (adreno_is_a330v2(adreno_dev))
+		kgsl_regwrite(device, A3XX_RBBM_GPR0_CTL,
+			A330v2_RBBM_GPR0_CTL_DEFAULT);
+	else if (adreno_is_a330(adreno_dev))
+		kgsl_regwrite(device, A3XX_RBBM_GPR0_CTL,
+			A330_RBBM_GPR0_CTL_DEFAULT);
+	else if (adreno_is_a310(adreno_dev))
+		kgsl_regwrite(device, A3XX_RBBM_GPR0_CTL,
+			A310_RBBM_GPR0_CTL_DEFAULT);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_USES_OCMEM))
+		kgsl_regwrite(device, A3XX_RB_GMEM_BASE_ADDR,
+			(unsigned int)(adreno_dev->gmem_base >> 14));
+
+	/* Turn on protection */
+	a3xx_protect_init(adreno_dev);
+
+	/* Turn on performance counters */
+	kgsl_regwrite(device, A3XX_RBBM_PERFCTR_CTL, 0x01);
+
+	kgsl_regwrite(device, A3XX_CP_DEBUG, A3XX_CP_DEBUG_DEFAULT);
+
+	/* CP ROQ queue sizes (bytes) - RB:16, ST:16, IB1:32, IB2:64 */
+	if (adreno_is_a305b(adreno_dev) ||
+			adreno_is_a310(adreno_dev) ||
+			adreno_is_a330(adreno_dev))
+		kgsl_regwrite(device, A3XX_CP_QUEUE_THRESHOLDS, 0x003E2008);
+	else
+		kgsl_regwrite(device, A3XX_CP_QUEUE_THRESHOLDS, 0x000E0602);
+
+}
+
+static struct adreno_coresight_register a3xx_coresight_registers[] = {
+	{ A3XX_RBBM_DEBUG_BUS_CTL, 0x0001093F },
+	{ A3XX_RBBM_EXT_TRACE_STOP_CNT, 0x00017fff },
+	{ A3XX_RBBM_EXT_TRACE_START_CNT, 0x0001000f },
+	{ A3XX_RBBM_EXT_TRACE_PERIOD_CNT, 0x0001ffff },
+	{ A3XX_RBBM_EXT_TRACE_CMD, 0x00000001 },
+	{ A3XX_RBBM_EXT_TRACE_BUS_CTL, 0x89100010 },
+	{ A3XX_RBBM_DEBUG_BUS_STB_CTL0, 0x00000000 },
+	{ A3XX_RBBM_DEBUG_BUS_STB_CTL1, 0xFFFFFFFE },
+	{ A3XX_RBBM_INT_TRACE_BUS_CTL, 0x00201111 },
+};
+
+static ADRENO_CORESIGHT_ATTR(config_debug_bus,
+	&a3xx_coresight_registers[0]);
+static ADRENO_CORESIGHT_ATTR(config_trace_stop_cnt,
+	&a3xx_coresight_registers[1]);
+static ADRENO_CORESIGHT_ATTR(config_trace_start_cnt,
+	&a3xx_coresight_registers[2]);
+static ADRENO_CORESIGHT_ATTR(config_trace_period_cnt,
+	&a3xx_coresight_registers[3]);
+static ADRENO_CORESIGHT_ATTR(config_trace_cmd,
+	&a3xx_coresight_registers[4]);
+static ADRENO_CORESIGHT_ATTR(config_trace_bus_ctl,
+	&a3xx_coresight_registers[5]);
+
+static struct attribute *a3xx_coresight_attrs[] = {
+	&coresight_attr_config_debug_bus.attr.attr,
+	&coresight_attr_config_trace_start_cnt.attr.attr,
+	&coresight_attr_config_trace_stop_cnt.attr.attr,
+	&coresight_attr_config_trace_period_cnt.attr.attr,
+	&coresight_attr_config_trace_cmd.attr.attr,
+	&coresight_attr_config_trace_bus_ctl.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group a3xx_coresight_group = {
+	.attrs = a3xx_coresight_attrs,
+};
+
+static const struct attribute_group *a3xx_coresight_groups[] = {
+	&a3xx_coresight_group,
+	NULL,
+};
+
+static struct adreno_coresight a3xx_coresight = {
+	.registers = a3xx_coresight_registers,
+	.count = ARRAY_SIZE(a3xx_coresight_registers),
+	.groups = a3xx_coresight_groups,
+};
+
+static unsigned int a3xx_int_bits[ADRENO_INT_BITS_MAX] = {
+	ADRENO_INT_DEFINE(ADRENO_INT_RBBM_AHB_ERROR, A3XX_INT_RBBM_AHB_ERROR),
+};
+
+/* Register offset defines for A3XX */
+static unsigned int a3xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_WADDR, A3XX_CP_ME_RAM_WADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_DATA, A3XX_CP_ME_RAM_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PFP_UCODE_DATA, A3XX_CP_PFP_UCODE_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PFP_UCODE_ADDR, A3XX_CP_PFP_UCODE_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_WFI_PEND_CTR, A3XX_CP_WFI_PEND_CTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A3XX_CP_RB_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A3XX_CP_RB_RPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A3XX_CP_RB_WPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A3XX_CP_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A3XX_CP_ME_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A3XX_CP_RB_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A3XX_CP_IB1_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A3XX_CP_IB1_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A3XX_CP_IB2_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A3XX_CP_IB2_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_TIMESTAMP, A3XX_CP_SCRATCH_REG0),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG6, A3XX_CP_SCRATCH_REG6),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG7, A3XX_CP_SCRATCH_REG7),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_RADDR, A3XX_CP_ME_RAM_RADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_ADDR, A3XX_CP_ROQ_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_DATA, A3XX_CP_ROQ_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_ADDR, A3XX_CP_MERCIU_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA, A3XX_CP_MERCIU_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA2, A3XX_CP_MERCIU_DATA2),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_ADDR, A3XX_CP_MEQ_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_DATA, A3XX_CP_MEQ_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A3XX_CP_PROTECT_REG_0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A3XX_RBBM_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_CTL, A3XX_RBBM_PERFCTR_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
+					A3XX_RBBM_PERFCTR_LOAD_CMD0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
+					A3XX_RBBM_PERFCTR_LOAD_CMD1),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_PWR_1_LO,
+					A3XX_RBBM_PERFCTR_PWR_1_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A3XX_RBBM_INT_0_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_STATUS, A3XX_RBBM_INT_0_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_CLEAR_CMD,
+				A3XX_RBBM_INT_CLEAR_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A3XX_RBBM_CLOCK_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_VPC_DEBUG_RAM_SEL,
+				A3XX_VPC_VPC_DEBUG_RAM_SEL),
+	ADRENO_REG_DEFINE(ADRENO_REG_VPC_DEBUG_RAM_READ,
+				A3XX_VPC_VPC_DEBUG_RAM_READ),
+	ADRENO_REG_DEFINE(ADRENO_REG_PA_SC_AA_CONFIG, A3XX_PA_SC_AA_CONFIG),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PM_OVERRIDE2, A3XX_RBBM_PM_OVERRIDE2),
+	ADRENO_REG_DEFINE(ADRENO_REG_SQ_GPR_MANAGEMENT, A3XX_SQ_GPR_MANAGEMENT),
+	ADRENO_REG_DEFINE(ADRENO_REG_SQ_INST_STORE_MANAGEMENT,
+				A3XX_SQ_INST_STORE_MANAGEMENT),
+	ADRENO_REG_DEFINE(ADRENO_REG_TP0_CHICKEN, A3XX_TP0_CHICKEN),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_RBBM_CTL, A3XX_RBBM_RBBM_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A3XX_RBBM_SW_RESET_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE0,
+			A3XX_UCHE_CACHE_INVALIDATE0_REG),
+	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE1,
+			A3XX_UCHE_CACHE_INVALIDATE1_REG),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
+				A3XX_RBBM_PERFCTR_LOAD_VALUE_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
+				A3XX_RBBM_PERFCTR_LOAD_VALUE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL0,
+				A3XX_VBIF_XIN_HALT_CTRL0),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL1,
+				A3XX_VBIF_XIN_HALT_CTRL1),
+};
+
+static const struct adreno_reg_offsets a3xx_reg_offsets = {
+	.offsets = a3xx_register_offsets,
+	.offset_0 = ADRENO_REG_REGISTER_MAX,
+};
+
+/*
+ * Defined the size of sections dumped in snapshot, these values
+ * may change after initialization based on the specific core
+ */
+static struct adreno_snapshot_sizes a3xx_snap_sizes = {
+	.cp_pfp = 0x14,
+	.vpc_mem = 512,
+	.cp_meq = 16,
+	.shader_mem = 0x4000,
+	.cp_merciu = 0,
+	.roq = 128,
+};
+
+static struct adreno_snapshot_data a3xx_snapshot_data = {
+	.sect_sizes = &a3xx_snap_sizes,
+};
+
+static int _load_firmware(struct kgsl_device *device, const char *fwfile,
+			  void **buf, int *len)
+{
+	const struct firmware *fw = NULL;
+	int ret;
+
+	ret = request_firmware(&fw, fwfile, device->dev);
+
+	if (ret) {
+		KGSL_DRV_ERR(device, "request_firmware(%s) failed: %d\n",
+			     fwfile, ret);
+		return ret;
+	}
+
+	if (fw)
+		*buf = kmalloc(fw->size, GFP_KERNEL);
+	else
+		return -EINVAL;
+
+	if (*buf) {
+		memcpy(*buf, fw->data, fw->size);
+		*len = fw->size;
+	}
+
+	release_firmware(fw);
+	return (*buf != NULL) ? 0 : -ENOMEM;
+}
+
+int a3xx_microcode_read(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (adreno_dev->pm4_fw == NULL) {
+		int len;
+		void *ptr;
+
+		int ret = _load_firmware(device,
+			adreno_dev->gpucore->pm4fw_name, &ptr, &len);
+
+		if (ret) {
+			KGSL_DRV_FATAL(device, "Failed to read pm4 ucode %s\n",
+					   adreno_dev->gpucore->pm4fw_name);
+			return ret;
+		}
+
+		/* PM4 size is 3 dword aligned plus 1 dword of version */
+		if (len % ((sizeof(uint32_t) * 3)) != sizeof(uint32_t)) {
+			KGSL_DRV_ERR(device, "Bad pm4 microcode size: %d\n",
+				len);
+			kfree(ptr);
+			return -ENOMEM;
+		}
+
+		adreno_dev->pm4_fw_size = len / sizeof(uint32_t);
+		adreno_dev->pm4_fw = ptr;
+		adreno_dev->pm4_fw_version = adreno_dev->pm4_fw[1];
+	}
+
+	if (adreno_dev->pfp_fw == NULL) {
+		int len;
+		void *ptr;
+
+		int ret = _load_firmware(device,
+			adreno_dev->gpucore->pfpfw_name, &ptr, &len);
+		if (ret) {
+			KGSL_DRV_FATAL(device, "Failed to read pfp ucode %s\n",
+					   adreno_dev->gpucore->pfpfw_name);
+			return ret;
+		}
+
+		/* PFP size shold be dword aligned */
+		if (len % sizeof(uint32_t) != 0) {
+			KGSL_DRV_ERR(device, "Bad PFP microcode size: %d\n",
+						len);
+			kfree(ptr);
+			return -ENOMEM;
+		}
+
+		adreno_dev->pfp_fw_size = len / sizeof(uint32_t);
+		adreno_dev->pfp_fw = ptr;
+		adreno_dev->pfp_fw_version = adreno_dev->pfp_fw[5];
+	}
+
+	return 0;
+}
+/**
+ * load_pm4_ucode() - Load pm4 ucode
+ * @adreno_dev: Pointer to an adreno device
+ * @start: Starting index in pm4 ucode to load
+ * @end: Ending index of pm4 ucode to load
+ * @addr: Address to load the pm4 ucode
+ *
+ * Load the pm4 ucode from @start at @addr.
+ */
+static inline void load_pm4_ucode(struct adreno_device *adreno_dev,
+			unsigned int start, unsigned int end, unsigned int addr)
+{
+	int i;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_WADDR, addr);
+	for (i = start; i < end; i++)
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_DATA,
+					adreno_dev->pm4_fw[i]);
+}
+/**
+ * load_pfp_ucode() - Load pfp ucode
+ * @adreno_dev: Pointer to an adreno device
+ * @start: Starting index in pfp ucode to load
+ * @end: Ending index of pfp ucode to load
+ * @addr: Address to load the pfp ucode
+ *
+ * Load the pfp ucode from @start at @addr.
+ */
+static inline void load_pfp_ucode(struct adreno_device *adreno_dev,
+			unsigned int start, unsigned int end, unsigned int addr)
+{
+	int i;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, addr);
+	for (i = start; i < end; i++)
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
+						adreno_dev->pfp_fw[i]);
+}
+
+/**
+ * _ringbuffer_bootstrap_ucode() - Bootstrap GPU Ucode
+ * @adreno_dev: Pointer to an adreno device
+ * @rb: The ringbuffer to boostrap the code into
+ * @load_jt: If non zero only load Jump tables
+ *
+ * Bootstrap ucode for GPU
+ * load_jt == 0, bootstrap full microcode
+ * load_jt == 1, bootstrap jump tables of microcode
+ *
+ * For example a bootstrap packet would like below
+ * Setup a type3 bootstrap packet
+ * PFP size to bootstrap
+ * PFP addr to write the PFP data
+ * PM4 size to bootstrap
+ * PM4 addr to write the PM4 data
+ * PFP dwords from microcode to bootstrap
+ * PM4 size dwords from microcode to bootstrap
+ */
+static int _ringbuffer_bootstrap_ucode(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, unsigned int load_jt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int *cmds, bootstrap_size, rb_size;
+	int i = 0;
+	int ret;
+	unsigned int pm4_size, pm4_idx, pm4_addr, pfp_size, pfp_idx, pfp_addr;
+
+	/* Only bootstrap jump tables of ucode */
+	if (load_jt) {
+		pm4_idx = adreno_dev->gpucore->pm4_jt_idx;
+		pm4_addr = adreno_dev->gpucore->pm4_jt_addr;
+		pfp_idx = adreno_dev->gpucore->pfp_jt_idx;
+		pfp_addr = adreno_dev->gpucore->pfp_jt_addr;
+	} else {
+		/* Bootstrap full ucode */
+		pm4_idx = 1;
+		pm4_addr = 0;
+		pfp_idx = 1;
+		pfp_addr = 0;
+	}
+
+	pm4_size = (adreno_dev->pm4_fw_size - pm4_idx);
+	pfp_size = (adreno_dev->pfp_fw_size - pfp_idx);
+
+	bootstrap_size = (pm4_size + pfp_size + 5);
+
+	/*
+	 * Overwrite the first entry in the jump table with the special
+	 * bootstrap opcode
+	 */
+
+	if (adreno_is_a4xx(adreno_dev)) {
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR,
+			0x400);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
+			 0x6f0009);
+		/*
+		 * The support packets (the RMW and INTERRUPT) that are sent
+		 * after the bootstrap packet should not be included in the size
+		 * of the bootstrap packet but we do need to reserve enough
+		 * space for those too
+		 */
+		rb_size = bootstrap_size + 6;
+	} else {
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR,
+			0x200);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
+			 0x6f0005);
+		rb_size = bootstrap_size;
+	}
+
+	/* clear ME_HALT to start micro engine */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0);
+
+	cmds = adreno_ringbuffer_allocspace(rb, rb_size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	/* Construct the packet that bootsraps the ucode */
+	*cmds++ = cp_type3_packet(CP_BOOTSTRAP_UCODE, (bootstrap_size - 1));
+	*cmds++ = pfp_size;
+	*cmds++ = pfp_addr;
+	*cmds++ = pm4_size;
+	*cmds++ = pm4_addr;
+
+	/**
+	 * Theory of operation:
+	 *
+	 * In A4x, we cannot have the PFP executing instructions while its
+	 * instruction RAM is loading. We load the PFP's instruction RAM
+	 * using type-0 writes from the ME.
+	 *
+	 * To make sure the PFP is not fetching instructions at the same
+	 * time, we put it in a one-instruction loop:
+	 * mvc (ME), (ringbuffer)
+	 * which executes repeatedly until all of the data has been moved
+	 * from the ring buffer to the ME.
+	 */
+	if (adreno_is_a4xx(adreno_dev)) {
+		for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++)
+			*cmds++ = adreno_dev->pm4_fw[i];
+		for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++)
+			*cmds++ = adreno_dev->pfp_fw[i];
+
+		*cmds++ = cp_type3_packet(CP_REG_RMW, 3);
+		*cmds++ = 0x20000000 + A4XX_CP_RB_WPTR;
+		*cmds++ = 0xffffffff;
+		*cmds++ = 0x00000002;
+		*cmds++ = cp_type3_packet(CP_INTERRUPT, 1);
+		*cmds++ = 0;
+
+		rb->_wptr = rb->_wptr - 2;
+		adreno_ringbuffer_submit(rb, NULL);
+		rb->_wptr = rb->_wptr + 2;
+	} else {
+		for (i = pfp_idx; i < adreno_dev->pfp_fw_size; i++)
+			*cmds++ = adreno_dev->pfp_fw[i];
+		for (i = pm4_idx; i < adreno_dev->pm4_fw_size; i++)
+			*cmds++ = adreno_dev->pm4_fw[i];
+		adreno_ringbuffer_submit(rb, NULL);
+	}
+
+	/* idle device to validate bootstrap */
+	ret = adreno_spin_idle(adreno_dev, 2000);
+
+	if (ret) {
+		KGSL_DRV_ERR(device, "microcode bootstrap failed to idle\n");
+		kgsl_device_snapshot(device, NULL);
+	}
+
+	/* Clear the chicken bit for speed up on A430 and its derivatives */
+	if (!adreno_is_a420(adreno_dev))
+		kgsl_regwrite(device, A4XX_CP_DEBUG,
+					A4XX_CP_DEBUG_DEFAULT & ~(1 << 14));
+
+	return ret;
+}
+
+int a3xx_microcode_load(struct adreno_device *adreno_dev,
+				unsigned int start_type)
+{
+	int status;
+	struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+
+	if (start_type == ADRENO_START_COLD) {
+		/* If bootstrapping if supported to load ucode */
+		if (adreno_bootstrap_ucode(adreno_dev)) {
+
+			/*
+			 * load first pm4_bstrp_size + pfp_bstrp_size microcode
+			 * dwords using AHB write, this small microcode has
+			 * dispatcher + booter this initial microcode enables
+			 * CP to understand CP_BOOTSTRAP_UCODE packet in
+			 * function _ringbuffer_bootstrap_ucode.
+			 * CP_BOOTSTRAP_UCODE packet loads rest of the
+			 * microcode.
+			 */
+
+			load_pm4_ucode(adreno_dev, 1,
+				adreno_dev->gpucore->pm4_bstrp_size+1, 0);
+
+			load_pfp_ucode(adreno_dev, 1,
+				adreno_dev->gpucore->pfp_bstrp_size+1, 0);
+
+			/* Bootstrap rest of the ucode here */
+			status = _ringbuffer_bootstrap_ucode(adreno_dev, rb, 0);
+			if (status != 0)
+				return status;
+
+		} else {
+			/* load the CP ucode using AHB writes */
+			load_pm4_ucode(adreno_dev, 1, adreno_dev->pm4_fw_size,
+				0);
+
+			/* load the prefetch parser ucode using AHB writes */
+			load_pfp_ucode(adreno_dev, 1, adreno_dev->pfp_fw_size,
+				0);
+		}
+	} else if (start_type == ADRENO_START_WARM) {
+			/* If bootstrapping if supported to load jump tables */
+		if (adreno_bootstrap_ucode(adreno_dev)) {
+			status = _ringbuffer_bootstrap_ucode(adreno_dev, rb, 1);
+			if (status != 0)
+				return status;
+
+		} else {
+			/* load the CP jump tables using AHB writes */
+			load_pm4_ucode(adreno_dev,
+				adreno_dev->gpucore->pm4_jt_idx,
+				adreno_dev->pm4_fw_size,
+				adreno_dev->gpucore->pm4_jt_addr);
+
+			/*
+			 * load the prefetch parser jump tables using AHB writes
+			 */
+			load_pfp_ucode(adreno_dev,
+				adreno_dev->gpucore->pfp_jt_idx,
+				adreno_dev->pfp_fw_size,
+				adreno_dev->gpucore->pfp_jt_addr);
+		}
+	} else
+		return -EINVAL;
+
+	return 0;
+}
+
+struct adreno_gpudev adreno_a3xx_gpudev = {
+	.reg_offsets = &a3xx_reg_offsets,
+	.int_bits = a3xx_int_bits,
+	.ft_perf_counters = a3xx_ft_perf_counters,
+	.ft_perf_counters_count = ARRAY_SIZE(a3xx_ft_perf_counters),
+	.perfcounters = &a3xx_perfcounters,
+	.irq = &a3xx_irq,
+	.irq_trace = trace_kgsl_a3xx_irq_status,
+	.snapshot_data = &a3xx_snapshot_data,
+	.num_prio_levels = 1,
+	.vbif_xin_halt_ctrl0_mask = A3XX_VBIF_XIN_HALT_CTRL0_MASK,
+	.platform_setup = a3xx_platform_setup,
+	.rb_start = a3xx_rb_start,
+	.init = a3xx_init,
+	.microcode_read = a3xx_microcode_read,
+	.perfcounter_init = a3xx_perfcounter_init,
+	.perfcounter_close = a3xx_perfcounter_close,
+	.start = a3xx_start,
+	.snapshot = a3xx_snapshot,
+	.coresight = &a3xx_coresight,
+};
diff --git a/drivers/gpu/msm/adreno_a3xx.h b/drivers/gpu/msm/adreno_a3xx.h
new file mode 100644
index 0000000..11596b8
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a3xx.h
@@ -0,0 +1,58 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __A3XX_H
+#define __A3XX_H
+
+#include "a3xx_reg.h"
+
+#define A3XX_IRQ_FLAGS \
+	{ BIT(A3XX_INT_RBBM_GPU_IDLE), "RBBM_GPU_IDLE" }, \
+	{ BIT(A3XX_INT_RBBM_AHB_ERROR), "RBBM_AHB_ERR" }, \
+	{ BIT(A3XX_INT_RBBM_REG_TIMEOUT), "RBBM_REG_TIMEOUT" }, \
+	{ BIT(A3XX_INT_RBBM_ME_MS_TIMEOUT), "RBBM_ME_MS_TIMEOUT" }, \
+	{ BIT(A3XX_INT_RBBM_PFP_MS_TIMEOUT), "RBBM_PFP_MS_TIMEOUT" }, \
+	{ BIT(A3XX_INT_RBBM_ATB_BUS_OVERFLOW), "RBBM_ATB_BUS_OVERFLOW" }, \
+	{ BIT(A3XX_INT_VFD_ERROR), "RBBM_VFD_ERROR" }, \
+	{ BIT(A3XX_INT_CP_SW_INT), "CP_SW" }, \
+	{ BIT(A3XX_INT_CP_T0_PACKET_IN_IB), "CP_T0_PACKET_IN_IB" }, \
+	{ BIT(A3XX_INT_CP_OPCODE_ERROR), "CP_OPCODE_ERROR" }, \
+	{ BIT(A3XX_INT_CP_RESERVED_BIT_ERROR), "CP_RESERVED_BIT_ERROR" }, \
+	{ BIT(A3XX_INT_CP_HW_FAULT), "CP_HW_FAULT" }, \
+	{ BIT(A3XX_INT_CP_DMA), "CP_DMA" }, \
+	{ BIT(A3XX_INT_CP_IB2_INT), "CP_IB2_INT" }, \
+	{ BIT(A3XX_INT_CP_IB1_INT), "CP_IB1_INT" }, \
+	{ BIT(A3XX_INT_CP_RB_INT), "CP_RB_INT" }, \
+	{ BIT(A3XX_INT_CP_REG_PROTECT_FAULT), "CP_REG_PROTECT_FAULT" }, \
+	{ BIT(A3XX_INT_CP_RB_DONE_TS), "CP_RB_DONE_TS" }, \
+	{ BIT(A3XX_INT_CP_VS_DONE_TS), "CP_VS_DONE_TS" }, \
+	{ BIT(A3XX_INT_CP_PS_DONE_TS), "CP_PS_DONE_TS" }, \
+	{ BIT(A3XX_INT_CACHE_FLUSH_TS), "CACHE_FLUSH_TS" }, \
+	{ BIT(A3XX_INT_CP_AHB_ERROR_HALT), "CP_AHB_ERROR_HALT" }, \
+	{ BIT(A3XX_INT_MISC_HANG_DETECT), "MISC_HANG_DETECT" }, \
+	{ BIT(A3XX_INT_UCHE_OOB_ACCESS), "UCHE_OOB_ACCESS" }
+
+unsigned int a3xx_irq_pending(struct adreno_device *adreno_dev);
+
+int a3xx_microcode_read(struct adreno_device *adreno_dev);
+int a3xx_microcode_load(struct adreno_device *adreno_dev,
+				unsigned int start_type);
+int a3xx_perfcounter_enable(struct adreno_device *adreno_dev,
+	unsigned int group, unsigned int counter, unsigned int countable);
+uint64_t a3xx_perfcounter_read(struct adreno_device *adreno_dev,
+	unsigned int group, unsigned int counter);
+
+void a3xx_a4xx_err_callback(struct adreno_device *adreno_dev, int bit);
+
+void a3xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+#endif /*__A3XX_H */
diff --git a/drivers/gpu/msm/adreno_a3xx_snapshot.c b/drivers/gpu/msm/adreno_a3xx_snapshot.c
new file mode 100644
index 0000000..240dcdb
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a3xx_snapshot.c
@@ -0,0 +1,371 @@
+/* Copyright (c) 2012-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/io.h>
+#include "kgsl.h"
+#include "adreno.h"
+#include "kgsl_snapshot.h"
+#include "a3xx_reg.h"
+#include "adreno_snapshot.h"
+#include "adreno_a3xx.h"
+
+/*
+ * Set of registers to dump for A3XX on snapshot.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+
+static const unsigned int a3xx_registers[] = {
+	0x0000, 0x0002, 0x0010, 0x0012, 0x0018, 0x0018, 0x0020, 0x0027,
+	0x0029, 0x002b, 0x002e, 0x0033, 0x0040, 0x0042, 0x0050, 0x005c,
+	0x0060, 0x006c, 0x0080, 0x0082, 0x0084, 0x0088, 0x0090, 0x00e5,
+	0x00ea, 0x00ed, 0x0100, 0x0100, 0x0110, 0x0123, 0x01c0, 0x01c1,
+	0x01c3, 0x01c5, 0x01c7, 0x01c7, 0x01d5, 0x01d9, 0x01dc, 0x01dd,
+	0x01ea, 0x01ea, 0x01ee, 0x01f1, 0x01f5, 0x01f6, 0x01f8, 0x01f9,
+	0x01fc, 0x01ff,
+	0x0440, 0x0440, 0x0443, 0x0443, 0x0445, 0x0445, 0x044d, 0x044f,
+	0x0452, 0x0452, 0x0454, 0x046f, 0x047c, 0x047c, 0x047f, 0x047f,
+	0x0578, 0x057f, 0x0600, 0x0602, 0x0605, 0x0607, 0x060a, 0x060e,
+	0x0612, 0x0614, 0x0c01, 0x0c02, 0x0c06, 0x0c1d, 0x0c3d, 0x0c3f,
+	0x0c48, 0x0c4b, 0x0c80, 0x0c80, 0x0c88, 0x0c8b, 0x0ca0, 0x0cb7,
+	0x0cc0, 0x0cc1, 0x0cc6, 0x0cc7, 0x0ce4, 0x0ce5,
+	0x0e41, 0x0e45, 0x0e64, 0x0e65,
+	0x0e80, 0x0e82, 0x0e84, 0x0e89, 0x0ea0, 0x0ea1, 0x0ea4, 0x0ea7,
+	0x0ec4, 0x0ecb, 0x0ee0, 0x0ee0, 0x0f00, 0x0f01, 0x0f03, 0x0f09,
+	0x2040, 0x2040, 0x2044, 0x2044, 0x2048, 0x204d, 0x2068, 0x2069,
+	0x206c, 0x206d, 0x2070, 0x2070, 0x2072, 0x2072, 0x2074, 0x2075,
+	0x2079, 0x207a, 0x20c0, 0x20d3, 0x20e4, 0x20ef, 0x2100, 0x2109,
+	0x210c, 0x210c, 0x210e, 0x210e, 0x2110, 0x2111, 0x2114, 0x2115,
+	0x21e4, 0x21e4, 0x21ea, 0x21ea, 0x21ec, 0x21ed, 0x21f0, 0x21f0,
+	0x2240, 0x227e,
+	0x2280, 0x228b, 0x22c0, 0x22c0, 0x22c4, 0x22ce, 0x22d0, 0x22d8,
+	0x22df, 0x22e6, 0x22e8, 0x22e9, 0x22ec, 0x22ec, 0x22f0, 0x22f7,
+	0x22ff, 0x22ff, 0x2340, 0x2343,
+	0x2440, 0x2440, 0x2444, 0x2444, 0x2448, 0x244d,
+	0x2468, 0x2469, 0x246c, 0x246d, 0x2470, 0x2470, 0x2472, 0x2472,
+	0x2474, 0x2475, 0x2479, 0x247a, 0x24c0, 0x24d3, 0x24e4, 0x24ef,
+	0x2500, 0x2509, 0x250c, 0x250c, 0x250e, 0x250e, 0x2510, 0x2511,
+	0x2514, 0x2515, 0x25e4, 0x25e4, 0x25ea, 0x25ea, 0x25ec, 0x25ed,
+	0x25f0, 0x25f0,
+	0x2640, 0x267e, 0x2680, 0x268b, 0x26c0, 0x26c0, 0x26c4, 0x26ce,
+	0x26d0, 0x26d8, 0x26df, 0x26e6, 0x26e8, 0x26e9, 0x26ec, 0x26ec,
+	0x26f0, 0x26f7, 0x26ff, 0x26ff, 0x2740, 0x2743,
+	0x300C, 0x300E, 0x301C, 0x301D,
+	0x302A, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031, 0x3034, 0x3036,
+	0x303C, 0x303C, 0x305E, 0x305F,
+};
+
+/* Removed the following HLSQ register ranges from being read during
+ * fault tolerance since reading the registers may cause the device to hang:
+ */
+static const unsigned int a3xx_hlsq_registers[] = {
+	0x0e00, 0x0e05, 0x0e0c, 0x0e0c, 0x0e22, 0x0e23,
+	0x2200, 0x2212, 0x2214, 0x2217, 0x221a, 0x221a,
+	0x2600, 0x2612, 0x2614, 0x2617, 0x261a, 0x261a,
+};
+
+/* The set of additional registers to be dumped for A330 */
+
+static const unsigned int a330_registers[] = {
+	0x1d0, 0x1d0, 0x1d4, 0x1d4, 0x453, 0x453,
+};
+
+/* Shader memory size in words */
+#define SHADER_MEMORY_SIZE 0x4000
+
+/**
+ * _rbbm_debug_bus_read - Helper function to read data from the RBBM
+ * debug bus.
+ * @device - GPU device to read/write registers
+ * @block_id - Debug bus block to read from
+ * @index - Index in the debug bus block to read
+ * @ret - Value of the register read
+ */
+static void _rbbm_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int block = (block_id << 8) | 1 << 16;
+
+	kgsl_regwrite(device, A3XX_RBBM_DEBUG_BUS_CTL, block | index);
+	kgsl_regread(device, A3XX_RBBM_DEBUG_BUS_DATA_STATUS, val);
+}
+
+/**
+ * a3xx_snapshot_shader_memory - Helper function to dump the GPU shader
+ * memory to the snapshot buffer.
+ * @device: GPU device whose shader memory is to be dumped
+ * @buf: Pointer to binary snapshot data blob being made
+ * @remain: Number of remaining bytes in the snapshot blob
+ * @priv: Unused parameter
+ *
+ */
+static size_t a3xx_snapshot_shader_memory(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int shader_read_len = SHADER_MEMORY_SIZE;
+
+	if (shader_read_len > (device->shader_mem_len >> 2))
+		shader_read_len = (device->shader_mem_len >> 2);
+
+	if (remain < DEBUG_SECTION_SZ(shader_read_len)) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_SHADER_MEMORY;
+	header->size = shader_read_len;
+
+	/* Map shader memory to kernel, for dumping */
+	if (device->shader_mem_virt == NULL)
+		device->shader_mem_virt = devm_ioremap(device->dev,
+					device->shader_mem_phys,
+					device->shader_mem_len);
+
+	if (device->shader_mem_virt == NULL) {
+		KGSL_DRV_ERR(device,
+		"Unable to map shader memory region\n");
+		return 0;
+	}
+
+	/* Now, dump shader memory to snapshot */
+	for (i = 0; i < shader_read_len; i++)
+		adreno_shadermem_regread(device, i, &data[i]);
+
+
+	return DEBUG_SECTION_SZ(shader_read_len);
+}
+
+static size_t a3xx_snapshot_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	struct kgsl_snapshot_debugbus *header
+		= (struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int dwords;
+	size_t size;
+
+	/*
+	 * For A305 and A320 all debug bus regions are the same size (0x40). For
+	 * A330, they can be different sizes - most are still 0x40, but some
+	 * like CP are larger
+	 */
+
+	dwords = (adreno_is_a330(adreno_dev) ||
+		adreno_is_a305b(adreno_dev)) ?
+		block->dwords : 0x40;
+
+	size = (dwords * sizeof(unsigned int)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = block->block_id;
+	header->count = dwords;
+
+	for (i = 0; i < dwords; i++)
+		_rbbm_debug_bus_read(device, block->block_id, i, &data[i]);
+
+	return size;
+}
+
+static struct adreno_debugbus_block debugbus_blocks[] = {
+	{ RBBM_BLOCK_ID_CP, 0x52, },
+	{ RBBM_BLOCK_ID_RBBM, 0x40, },
+	{ RBBM_BLOCK_ID_VBIF, 0x40, },
+	{ RBBM_BLOCK_ID_HLSQ, 0x40, },
+	{ RBBM_BLOCK_ID_UCHE, 0x40, },
+	{ RBBM_BLOCK_ID_PC, 0x40, },
+	{ RBBM_BLOCK_ID_VFD, 0x40, },
+	{ RBBM_BLOCK_ID_VPC, 0x40, },
+	{ RBBM_BLOCK_ID_TSE, 0x40, },
+	{ RBBM_BLOCK_ID_RAS, 0x40, },
+	{ RBBM_BLOCK_ID_VSC, 0x40, },
+	{ RBBM_BLOCK_ID_SP_0, 0x40, },
+	{ RBBM_BLOCK_ID_SP_1, 0x40, },
+	{ RBBM_BLOCK_ID_SP_2, 0x40, },
+	{ RBBM_BLOCK_ID_SP_3, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_0, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_1, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_2, 0x40, },
+	{ RBBM_BLOCK_ID_TPL1_3, 0x40, },
+	{ RBBM_BLOCK_ID_RB_0, 0x40, },
+	{ RBBM_BLOCK_ID_RB_1, 0x40, },
+	{ RBBM_BLOCK_ID_RB_2, 0x40, },
+	{ RBBM_BLOCK_ID_RB_3, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_0, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_1, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_2, 0x40, },
+	{ RBBM_BLOCK_ID_MARB_3, 0x40, },
+};
+
+static void a3xx_snapshot_debugbus(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(debugbus_blocks); i++) {
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_DEBUGBUS, snapshot,
+			a3xx_snapshot_debugbus_block,
+			(void *) &debugbus_blocks[i]);
+	}
+}
+
+static void _snapshot_hlsq_regs(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/*
+	 * Trying to read HLSQ registers when the HLSQ block is busy
+	 * will cause the device to hang.  The RBBM_DEBUG_BUS has information
+	 * that will tell us if the HLSQ block is busy or not.  Read values
+	 * from the debug bus to ensure the HLSQ block is not busy (this
+	 * is hardware dependent).  If the HLSQ block is busy do not
+	 * dump the registers, otherwise dump the HLSQ registers.
+	 */
+
+	if (adreno_is_a330(adreno_dev)) {
+		/*
+		 * stall_ctxt_full status bit: RBBM_BLOCK_ID_HLSQ index 49 [27]
+		 *
+		 * if (!stall_context_full)
+		 * then dump HLSQ registers
+		 */
+		unsigned int stall_context_full = 0;
+
+		_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 49,
+				&stall_context_full);
+		stall_context_full &= 0x08000000;
+
+		if (stall_context_full)
+			return;
+	} else {
+		/*
+		 * tpif status bits: RBBM_BLOCK_ID_HLSQ index 4 [4:0]
+		 * spif status bits: RBBM_BLOCK_ID_HLSQ index 7 [5:0]
+		 *
+		 * if ((tpif == 0, 1, 28) && (spif == 0, 1, 10))
+		 * then dump HLSQ registers
+		 */
+		unsigned int next_pif = 0;
+
+		/* check tpif */
+		_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 4, &next_pif);
+		next_pif &= 0x1f;
+		if (next_pif != 0 && next_pif != 1 && next_pif != 28)
+			return;
+
+		/* check spif */
+		_rbbm_debug_bus_read(device, RBBM_BLOCK_ID_HLSQ, 7, &next_pif);
+		next_pif &= 0x3f;
+		if (next_pif != 0 && next_pif != 1 && next_pif != 10)
+			return;
+	}
+
+	SNAPSHOT_REGISTERS(device, snapshot, a3xx_hlsq_registers);
+}
+
+/*
+ * a3xx_snapshot() - A3XX GPU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Snapshot meta data
+ * @remain: Amount of space left in snapshot memory
+ *
+ * This is where all of the A3XX specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a3xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_snapshot_data *snap_data = gpudev->snapshot_data;
+	unsigned int reg;
+
+	/* Disable Clock gating temporarily for the debug bus to work */
+	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_CLOCK_CTL, 0x00);
+
+	SNAPSHOT_REGISTERS(device, snapshot, a3xx_registers);
+
+	_snapshot_hlsq_regs(device, snapshot);
+
+	if (adreno_is_a330(adreno_dev) || adreno_is_a305b(adreno_dev))
+		SNAPSHOT_REGISTERS(device, snapshot, a330_registers);
+
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A3XX_CP_STATE_DEBUG_INDEX, A3XX_CP_STATE_DEBUG_DATA,
+		0x0, snap_data->sect_sizes->cp_pfp);
+
+	/* CP_ME indexed registers */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A3XX_CP_ME_CNTL, A3XX_CP_ME_STATUS, 64, 44);
+
+	/* VPC memory */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_vpc_memory,
+		&snap_data->sect_sizes->vpc_mem);
+
+	/* CP MEQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG, snapshot,
+		adreno_snapshot_cp_meq, &snap_data->sect_sizes->cp_meq);
+
+	/* Shader working/shadow memory */
+	 kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a3xx_snapshot_shader_memory,
+		&snap_data->sect_sizes->shader_mem);
+
+
+	/* CP PFP and PM4 */
+
+	/*
+	 * Reading the microcode while the CP is running will
+	 * basically move the CP instruction pointer to
+	 * whatever address we read. Big badaboom ensues. Stop the CP
+	 * (if it isn't already stopped) to ensure that we are safe.
+	 * We do this here and not earlier to avoid corrupting the RBBM
+	 * status and CP registers - by the time we get here we don't
+	 * care about the contents of the CP anymore.
+	 */
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, &reg);
+	reg |= (1 << 27) | (1 << 28);
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_pfp_ram, NULL);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_pm4_ram, NULL);
+
+	/* CP ROQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_roq, &snap_data->sect_sizes->roq);
+
+	if (snap_data->sect_sizes->cp_merciu) {
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+			snapshot, adreno_snapshot_cp_merciu,
+			&snap_data->sect_sizes->cp_merciu);
+	}
+
+	a3xx_snapshot_debugbus(device, snapshot);
+}
diff --git a/drivers/gpu/msm/adreno_a4xx.c b/drivers/gpu/msm/adreno_a4xx.c
new file mode 100644
index 0000000..c807b67
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a4xx.c
@@ -0,0 +1,1799 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/msm_kgsl.h>
+
+#include "adreno.h"
+#include "kgsl_sharedmem.h"
+#include "a4xx_reg.h"
+#include "adreno_a3xx.h"
+#include "adreno_a4xx.h"
+#include "adreno_cp_parser.h"
+#include "adreno_trace.h"
+#include "adreno_pm4types.h"
+#include "adreno_perfcounter.h"
+
+#define SP_TP_PWR_ON BIT(20)
+/* A4XX_RBBM_CLOCK_CTL_IP */
+#define CNTL_IP_SW_COLLAPSE		BIT(0)
+
+/*
+ * Define registers for a4xx that contain addresses used by the
+ * cp parser logic
+ */
+const unsigned int a4xx_cp_addr_regs[ADRENO_CP_ADDR_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0,
+				A4XX_VSC_PIPE_DATA_ADDRESS_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0,
+				A4XX_VSC_PIPE_DATA_LENGTH_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1,
+				A4XX_VSC_PIPE_DATA_ADDRESS_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1,
+				A4XX_VSC_PIPE_DATA_LENGTH_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2,
+				A4XX_VSC_PIPE_DATA_ADDRESS_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2,
+				A4XX_VSC_PIPE_DATA_LENGTH_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3,
+				A4XX_VSC_PIPE_DATA_ADDRESS_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3,
+				A4XX_VSC_PIPE_DATA_LENGTH_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4,
+				A4XX_VSC_PIPE_DATA_ADDRESS_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4,
+				A4XX_VSC_PIPE_DATA_LENGTH_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5,
+				A4XX_VSC_PIPE_DATA_ADDRESS_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5,
+				A4XX_VSC_PIPE_DATA_LENGTH_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6,
+				A4XX_VSC_PIPE_DATA_ADDRESS_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6,
+				A4XX_VSC_PIPE_DATA_LENGTH_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7,
+				A4XX_VSC_PIPE_DATA_ADDRESS_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7,
+				A4XX_VSC_PIPE_DATA_LENGTH_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0,
+				A4XX_VFD_FETCH_INSTR_1_0),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1,
+				A4XX_VFD_FETCH_INSTR_1_1),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2,
+				A4XX_VFD_FETCH_INSTR_1_2),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3,
+				A4XX_VFD_FETCH_INSTR_1_3),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4,
+				A4XX_VFD_FETCH_INSTR_1_4),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5,
+				A4XX_VFD_FETCH_INSTR_1_5),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6,
+				A4XX_VFD_FETCH_INSTR_1_6),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7,
+				A4XX_VFD_FETCH_INSTR_1_7),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8,
+				A4XX_VFD_FETCH_INSTR_1_8),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9,
+				A4XX_VFD_FETCH_INSTR_1_9),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10,
+				A4XX_VFD_FETCH_INSTR_1_10),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11,
+				A4XX_VFD_FETCH_INSTR_1_11),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12,
+				A4XX_VFD_FETCH_INSTR_1_12),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13,
+				A4XX_VFD_FETCH_INSTR_1_13),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14,
+				A4XX_VFD_FETCH_INSTR_1_14),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15,
+				A4XX_VFD_FETCH_INSTR_1_15),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16,
+				A4XX_VFD_FETCH_INSTR_1_16),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_17,
+				A4XX_VFD_FETCH_INSTR_1_17),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_18,
+				A4XX_VFD_FETCH_INSTR_1_18),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_19,
+				A4XX_VFD_FETCH_INSTR_1_19),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_20,
+				A4XX_VFD_FETCH_INSTR_1_20),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_21,
+				A4XX_VFD_FETCH_INSTR_1_21),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_22,
+				A4XX_VFD_FETCH_INSTR_1_22),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_23,
+				A4XX_VFD_FETCH_INSTR_1_23),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_24,
+				A4XX_VFD_FETCH_INSTR_1_24),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_25,
+				A4XX_VFD_FETCH_INSTR_1_25),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_26,
+				A4XX_VFD_FETCH_INSTR_1_26),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_27,
+				A4XX_VFD_FETCH_INSTR_1_27),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_28,
+				A4XX_VFD_FETCH_INSTR_1_28),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_29,
+				A4XX_VFD_FETCH_INSTR_1_29),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_30,
+				A4XX_VFD_FETCH_INSTR_1_30),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31,
+				A4XX_VFD_FETCH_INSTR_1_31),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_VSC_SIZE_ADDRESS,
+				A4XX_VSC_SIZE_ADDRESS),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR,
+				A4XX_SP_VS_PVT_MEM_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR,
+				A4XX_SP_FS_PVT_MEM_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_VS_OBJ_START_REG,
+				A4XX_SP_VS_OBJ_START),
+	ADRENO_REG_DEFINE(ADRENO_CP_ADDR_SP_FS_OBJ_START_REG,
+				A4XX_SP_FS_OBJ_START),
+	ADRENO_REG_DEFINE(ADRENO_CP_UCHE_INVALIDATE0,
+				A4XX_UCHE_INVALIDATE0),
+	ADRENO_REG_DEFINE(ADRENO_CP_UCHE_INVALIDATE1,
+				A4XX_UCHE_INVALIDATE1),
+};
+
+static const struct adreno_vbif_data a405_vbif[] = {
+	{ A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a420_vbif[] = {
+	{ A4XX_VBIF_ABIT_SORT, 0x0001001F },
+	{ A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4 },
+	{ A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001 },
+	{ A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818 },
+	{ A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018 },
+	{ A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818 },
+	{ A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018 },
+	{ A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a430_vbif[] = {
+	{ A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001 },
+	{ A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818 },
+	{ A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018 },
+	{ A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818 },
+	{ A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018 },
+	{ A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003 },
+	{0, 0},
+};
+
+static const struct adreno_vbif_platform a4xx_vbif_platforms[] = {
+	{ adreno_is_a405, a405_vbif },
+	{ adreno_is_a420, a420_vbif },
+	{ adreno_is_a430, a430_vbif },
+	{ adreno_is_a418, a430_vbif },
+};
+
+/*
+ * a4xx_is_sptp_idle() - A430 SP/TP should be off to be considered idle
+ * @adreno_dev: The adreno device pointer
+ */
+static bool a4xx_is_sptp_idle(struct adreno_device *adreno_dev)
+{
+	unsigned int reg;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC))
+		return true;
+
+	/* If SP/TP pc isn't enabled, don't worry about power */
+	kgsl_regread(device, A4XX_CP_POWER_COLLAPSE_CNTL, &reg);
+	if (!(reg & 0x10))
+		return true;
+
+	/* Check that SP/TP is off */
+	kgsl_regread(device, A4XX_RBBM_POWER_STATUS, &reg);
+	return !(reg & SP_TP_PWR_ON);
+}
+
+/*
+ * a4xx_enable_hwcg() - Program the clock control registers
+ * @device: The adreno device pointer
+ */
+static void a4xx_enable_hwcg(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TP0, 0x02222202);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TP1, 0x02222202);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TP2, 0x02222202);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TP3, 0x02222202);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_TP0, 0x00002222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_TP1, 0x00002222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_TP2, 0x00002222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_TP3, 0x00002222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TP0, 0x0E739CE7);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TP1, 0x0E739CE7);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TP2, 0x0E739CE7);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TP3, 0x0E739CE7);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TP0, 0x00111111);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TP1, 0x00111111);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TP2, 0x00111111);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TP3, 0x00111111);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP0, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP1, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP2, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP3, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP0, 0x00222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP1, 0x00222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP2, 0x00222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP3, 0x00222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_SP0, 0x00000104);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_SP1, 0x00000104);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_SP2, 0x00000104);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_SP3, 0x00000104);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_SP0, 0x00000081);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_SP1, 0x00000081);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_SP2, 0x00000081);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_SP3, 0x00000081);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_UCHE, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_UCHE, 0x02222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL3_UCHE, 0x00000000);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL4_UCHE, 0x00000000);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_UCHE, 0x00004444);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_UCHE, 0x00001112);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_RB0, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_RB1, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_RB2, 0x22222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_RB3, 0x22222222);
+	/* Disable L1 clocking in A420 due to CCU issues with it */
+	if (adreno_is_a420(adreno_dev)) {
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB0, 0x00002020);
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB1, 0x00002020);
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB2, 0x00002020);
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB3, 0x00002020);
+	} else {
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB0, 0x00022020);
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB1, 0x00022020);
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB2, 0x00022020);
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_RB3, 0x00022020);
+	}
+	/* No CCU for A405 */
+	if (!adreno_is_a405(adreno_dev)) {
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_CTL_MARB_CCU0, 0x00000922);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_CTL_MARB_CCU1, 0x00000922);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_CTL_MARB_CCU2, 0x00000922);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_CTL_MARB_CCU3, 0x00000922);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU0, 0x00000000);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU1, 0x00000000);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU2, 0x00000000);
+		kgsl_regwrite(device,
+			A4XX_RBBM_CLOCK_HYST_RB_MARB_CCU3, 0x00000000);
+		kgsl_regwrite(device,
+				A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_0,
+				0x00000001);
+		kgsl_regwrite(device,
+				A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_1,
+				0x00000001);
+		kgsl_regwrite(device,
+				A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_2,
+				0x00000001);
+		kgsl_regwrite(device,
+				A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_3,
+				0x00000001);
+	}
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_MODE_GPC, 0x02222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_GPC, 0x04100104);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_GPC, 0x00022222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_COM_DCOM, 0x00000022);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_COM_DCOM, 0x0000010F);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_COM_DCOM, 0x00000022);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_TSE_RAS_RBBM, 0x00222222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00004104);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_HLSQ, 0x00000000);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
+	/*
+	 * Due to a HW timing issue, top level HW clock gating is causing
+	 * register read/writes to be dropped in adreno a430.
+	 * This timing issue started happening because of SP/TP power collapse.
+	 * On targets that do not have SP/TP PC there is no timing issue.
+	 * The HW timing issue could be fixed by
+	 * a) disabling SP/TP power collapse
+	 * b) or disabling HW clock gating.
+	 * Disabling HW clock gating + NAP enabled combination has
+	 * minimal power impact. So this option is chosen over disabling
+	 * SP/TP power collapse.
+	 * Revisions of A430 which chipid 2 and above do not have the issue.
+	 */
+	if (adreno_is_a430(adreno_dev) &&
+		(ADRENO_CHIPID_PATCH(adreno_dev->chipid) < 2))
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL, 0);
+	else
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2, 0);
+}
+/*
+ * a4xx_regulator_enable() - Enable any necessary HW regulators
+ * @adreno_dev: The adreno device pointer
+ *
+ * Some HW blocks may need their regulators explicitly enabled
+ * on a restart.  Clocks must be on during this call.
+ */
+static int a4xx_regulator_enable(struct adreno_device *adreno_dev)
+{
+	unsigned int reg;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!(adreno_is_a430(adreno_dev) || adreno_is_a418(adreno_dev))) {
+		/* Halt the sp_input_clk at HM level */
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL, 0x00000055);
+		a4xx_enable_hwcg(device);
+		return 0;
+	}
+
+	/* Set the default register values; set SW_COLLAPSE to 0 */
+	kgsl_regwrite(device, A4XX_RBBM_POWER_CNTL_IP, 0x778000);
+	do {
+		udelay(5);
+		kgsl_regread(device, A4XX_RBBM_POWER_STATUS, &reg);
+	} while (!(reg & SP_TP_PWR_ON));
+
+	/* Disable SP clock */
+	kgsl_regrmw(device, A4XX_RBBM_CLOCK_CTL_IP, CNTL_IP_SW_COLLAPSE, 0);
+	/* Enable hardware clockgating */
+	a4xx_enable_hwcg(device);
+	/* Enable SP clock */
+	kgsl_regrmw(device, A4XX_RBBM_CLOCK_CTL_IP, CNTL_IP_SW_COLLAPSE, 1);
+	return 0;
+}
+
+/*
+ * a4xx_regulator_disable() - Disable any necessary HW regulators
+ * @adreno_dev: The adreno device pointer
+ *
+ * Some HW blocks may need their regulators explicitly disabled
+ * on a power down to prevent current spikes.  Clocks must be on
+ * during this call.
+ */
+static void a4xx_regulator_disable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!(adreno_is_a430(adreno_dev) || adreno_is_a418(adreno_dev)))
+		return;
+
+	/* Set the default register values; set SW_COLLAPSE to 1 */
+	kgsl_regwrite(device, A4XX_RBBM_POWER_CNTL_IP, 0x778001);
+}
+
+/*
+ * a4xx_enable_pc() - Enable the SP/TP block power collapse
+ * @adreno_dev: The adreno device pointer
+ */
+static void a4xx_enable_pc(struct adreno_device *adreno_dev)
+{
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) ||
+		!test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag))
+		return;
+
+	kgsl_regwrite(KGSL_DEVICE(adreno_dev), A4XX_CP_POWER_COLLAPSE_CNTL,
+		0x00400010);
+	trace_adreno_sp_tp((unsigned long) __builtin_return_address(0));
+};
+
+/*
+ * a4xx_enable_ppd() - Enable the Peak power detect logic in the h/w
+ * @adreno_dev: The adreno device pointer
+ *
+ * A430 can detect peak current conditions inside h/w and throttle
+ * the workload to ALUs to mitigate it.
+ */
+static void a4xx_enable_ppd(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_PPD) ||
+		!test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag) ||
+		!adreno_is_a430v2(adreno_dev))
+		return;
+
+	/* Program thresholds */
+	kgsl_regwrite(device, A4XX_RBBM_PPD_EPOCH_INTER_TH_HIGH_CLEAR_THR,
+								0x003F0101);
+	kgsl_regwrite(device, A4XX_RBBM_PPD_EPOCH_INTER_TH_LOW, 0x00000101);
+	kgsl_regwrite(device, A4XX_RBBM_PPD_V2_SP_PWR_WEIGHTS, 0x00085014);
+	kgsl_regwrite(device, A4XX_RBBM_PPD_V2_SP_RB_EPOCH_TH, 0x00000B46);
+	kgsl_regwrite(device, A4XX_RBBM_PPD_V2_TP_CONFIG, 0xE4525111);
+	kgsl_regwrite(device, A4XX_RBBM_PPD_RAMP_V2_CONTROL, 0x0000000B);
+
+	/* Enable PPD*/
+	kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E40C);
+};
+
+/*
+ * a4xx_pwrlevel_change_settings() - Program the hardware during power level
+ * transitions
+ * @adreno_dev: The adreno device pointer
+ * @prelevel: The previous power level
+ * @postlevel: The new power level
+ * @post: True if called after the clock change has taken effect
+ */
+static void a4xx_pwrlevel_change_settings(struct adreno_device *adreno_dev,
+				unsigned int prelevel, unsigned int postlevel,
+				bool post)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	static int pre;
+
+	/* PPD programming only for A430v2 */
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_PPD) ||
+		!test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag) ||
+		!adreno_is_a430v2(adreno_dev))
+		return;
+
+	/* if this is a real pre, or a post without a previous pre, set pre */
+	if ((post == 0) || (pre == 0 && post == 1))
+		pre = 1;
+	else if (post == 1)
+		pre = 0;
+
+	if ((prelevel == 0) && pre) {
+		/* Going to Non-Turbo mode - mask the throttle and reset */
+		kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E40E);
+		kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E40C);
+	} else if ((postlevel == 0) && post) {
+		/* Going to Turbo mode - unmask the throttle and reset */
+		kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E40A);
+		kgsl_regwrite(device, A4XX_RBBM_PPD_CTRL, 0x1002E408);
+	}
+
+	if (post)
+		pre = 0;
+}
+
+/**
+ * a4xx_protect_init() - Initializes register protection on a4xx
+ * @adreno_dev: Pointer to the device structure
+ * Performs register writes to enable protected access to sensitive
+ * registers
+ */
+static void a4xx_protect_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int index = 0;
+	struct kgsl_protected_registers *iommu_regs;
+
+	/* enable access protection to privileged registers */
+	kgsl_regwrite(device, A4XX_CP_PROTECT_CTRL, 0x00000007);
+	/* RBBM registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x4, 2);
+	adreno_set_protected_registers(adreno_dev, &index, 0x8, 3);
+	adreno_set_protected_registers(adreno_dev, &index, 0x10, 4);
+	adreno_set_protected_registers(adreno_dev, &index, 0x20, 5);
+	adreno_set_protected_registers(adreno_dev, &index, 0x40, 6);
+	adreno_set_protected_registers(adreno_dev, &index, 0x80, 4);
+
+	/* Content protection registers */
+	if (kgsl_mmu_is_secured(&device->mmu)) {
+		adreno_set_protected_registers(adreno_dev, &index,
+			   A4XX_RBBM_SECVID_TSB_TRUSTED_BASE, 3);
+		adreno_set_protected_registers(adreno_dev, &index,
+			   A4XX_RBBM_SECVID_TRUST_CONTROL, 1);
+	}
+
+	/* CP registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x200, 7);
+	adreno_set_protected_registers(adreno_dev, &index, 0x580, 4);
+	adreno_set_protected_registers(adreno_dev, &index, A4XX_CP_PREEMPT, 1);
+	/* RB registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xCC0, 0);
+
+	/* HLSQ registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xE00, 0);
+
+	/* VPC registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xE60, 1);
+
+	if (adreno_is_a430(adreno_dev) || adreno_is_a420(adreno_dev) ||
+			adreno_is_a418(adreno_dev)) {
+		/*
+		 * Protect registers that might cause XPU violation if
+		 * accessed by GPU
+		 */
+		adreno_set_protected_registers(adreno_dev, &index, 0x2c00, 10);
+		adreno_set_protected_registers(adreno_dev, &index, 0x3300, 8);
+		adreno_set_protected_registers(adreno_dev, &index, 0x3400, 10);
+	}
+
+	/* SMMU registers */
+	iommu_regs = kgsl_mmu_get_prot_regs(&device->mmu);
+	if (iommu_regs)
+		adreno_set_protected_registers(adreno_dev, &index,
+				iommu_regs->base, iommu_regs->range);
+}
+
+static struct adreno_snapshot_sizes a4xx_snap_sizes = {
+	.cp_pfp = 0x14,
+	.vpc_mem = 2048,
+	.cp_meq = 64,
+	.shader_mem = 0x4000,
+	.cp_merciu = 64,
+	.roq = 512,
+};
+
+
+static void a4xx_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int cp_debug = A4XX_CP_DEBUG_DEFAULT;
+
+	adreno_vbif_start(adreno_dev, a4xx_vbif_platforms,
+			ARRAY_SIZE(a4xx_vbif_platforms));
+	/* Make all blocks contribute to the GPU BUSY perf counter */
+	kgsl_regwrite(device, A4XX_RBBM_GPU_BUSY_MASKED, 0xFFFFFFFF);
+
+	/* Tune the hystersis counters for SP and CP idle detection */
+	kgsl_regwrite(device, A4XX_RBBM_SP_HYST_CNT, 0x10);
+	kgsl_regwrite(device, A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
+	if (adreno_is_a430(adreno_dev))
+		kgsl_regwrite(device, A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
+
+	/*
+	 * Enable the RBBM error reporting bits.  This lets us get
+	 * useful information on failure
+	 */
+
+	kgsl_regwrite(device, A4XX_RBBM_AHB_CTL0, 0x00000001);
+
+	/* Enable AHB error reporting */
+	kgsl_regwrite(device, A4XX_RBBM_AHB_CTL1, 0xA6FFFFFF);
+
+	/* Turn on the power counters */
+	kgsl_regwrite(device, A4XX_RBBM_RBBM_CTL, 0x00000030);
+
+	/*
+	 * Turn on hang detection - this spews a lot of useful information
+	 * into the RBBM registers on a hang
+	 */
+	set_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
+	gpudev->irq->mask |= (1 << A4XX_INT_MISC_HANG_DETECT);
+	kgsl_regwrite(device, A4XX_RBBM_INTERFACE_HANG_INT_CTL,
+			(1 << 30) | 0xFFFF);
+
+	/* Set the GMEM/OCMEM base address for A4XX */
+	kgsl_regwrite(device, A4XX_RB_GMEM_BASE_ADDR,
+			(unsigned int)(adreno_dev->gmem_base >> 14));
+
+	/* Turn on performance counters */
+	kgsl_regwrite(device, A4XX_RBBM_PERFCTR_CTL, 0x01);
+
+	/* Enable VFD to access most of the UCHE (7 ways out of 8) */
+	kgsl_regwrite(device, A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
+
+	/* Disable L2 bypass to avoid UCHE out of bounds errors */
+	kgsl_regwrite(device, UCHE_TRAP_BASE_LO, 0xffff0000);
+	kgsl_regwrite(device, UCHE_TRAP_BASE_HI, 0xffff0000);
+
+	/* On A420 cores turn on SKIP_IB2_DISABLE in addition to the default */
+	if (adreno_is_a420(adreno_dev))
+		cp_debug |= (1 << 29);
+	/*
+	 * Set chicken bit to disable the speed up of bootstrap on A430
+	 * and its derivatives
+	 */
+	else
+		cp_debug |= (1 << 14);
+
+	kgsl_regwrite(device, A4XX_CP_DEBUG, cp_debug);
+
+	/* On A430 enable SP regfile sleep for power savings */
+	if (!adreno_is_a420(adreno_dev)) {
+		kgsl_regwrite(device, A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
+				0x00000441);
+		kgsl_regwrite(device, A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
+				0x00000441);
+	}
+
+	/*
+	 * For A420 set RBBM_CLOCK_DELAY_HLSQ.CGC_HLSQ_TP_EARLY_CYC >= 2
+	 * due to timing issue with HLSQ_TP_CLK_EN
+	 */
+	if (adreno_is_a420(adreno_dev)) {
+		unsigned int val;
+
+		kgsl_regread(device, A4XX_RBBM_CLOCK_DELAY_HLSQ, &val);
+		val &= ~A4XX_CGC_HLSQ_TP_EARLY_CYC_MASK;
+		val |= 2 << A4XX_CGC_HLSQ_TP_EARLY_CYC_SHIFT;
+		kgsl_regwrite(device, A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
+	}
+
+	/* A430 and derivatives offers bigger chunk of CP_STATE_DEBUG regs */
+	if (!adreno_is_a420(adreno_dev))
+		a4xx_snap_sizes.cp_pfp = 0x34;
+
+	if (adreno_is_a405(adreno_dev))
+		gpudev->vbif_xin_halt_ctrl0_mask =
+			A405_VBIF_XIN_HALT_CTRL0_MASK;
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	a4xx_protect_init(adreno_dev);
+}
+
+/*
+ * a4xx_err_callback() - Callback for a4xx error interrupts
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+static void a4xx_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	switch (bit) {
+	case A4XX_INT_RBBM_AHB_ERROR: {
+		kgsl_regread(device, A4XX_RBBM_AHB_ERROR_STATUS, &reg);
+
+		/*
+		 * Return the word address of the erroring register so that it
+		 * matches the register specification
+		 */
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
+			reg & (1 << 28) ? "WRITE" : "READ",
+			(reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
+			(reg >> 24) & 0xF);
+
+		/* Clear the error */
+		kgsl_regwrite(device, A4XX_RBBM_AHB_CMD, (1 << 4));
+		break;
+	}
+	case A4XX_INT_RBBM_REG_TIMEOUT:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: AHB register timeout\n");
+		break;
+	case A4XX_INT_RBBM_ME_MS_TIMEOUT:
+		kgsl_regread(device, A4XX_RBBM_AHB_ME_SPLIT_STATUS, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM | ME master split timeout | status=%x\n", reg);
+		break;
+	case A4XX_INT_RBBM_PFP_MS_TIMEOUT:
+		kgsl_regread(device, A4XX_RBBM_AHB_PFP_SPLIT_STATUS, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM | PFP master split timeout | status=%x\n", reg);
+		break;
+	case A4XX_INT_RBBM_ETS_MS_TIMEOUT:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM: ME master split timeout\n");
+		break;
+	case A4XX_INT_RBBM_ASYNC_OVERFLOW:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ASYNC overflow\n");
+		break;
+	case A4XX_INT_CP_OPCODE_ERROR:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer opcode error interrupt\n");
+		break;
+	case A4XX_INT_CP_RESERVED_BIT_ERROR:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer reserved bit error interrupt\n");
+		break;
+	case A4XX_INT_CP_HW_FAULT:
+	{
+		struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+		kgsl_regread(device, A4XX_CP_HW_FAULT, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"CP | Ringbuffer HW fault | status=%x\n", reg);
+		/*
+		 * mask off this interrupt since it can spam, it will be
+		 * turned on again when device resets
+		 */
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK,
+			gpudev->irq->mask & ~(1 << A4XX_INT_CP_HW_FAULT));
+		break;
+	}
+	case A4XX_INT_CP_REG_PROTECT_FAULT:
+		kgsl_regread(device, A4XX_CP_PROTECT_STATUS, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"CP | Protected mode error| %s | addr=%x\n",
+			reg & (1 << 24) ? "WRITE" : "READ",
+			(reg & 0xFFFFF) >> 2);
+		break;
+	case A4XX_INT_CP_AHB_ERROR_HALT:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer AHB error interrupt\n");
+		break;
+	case A4XX_INT_RBBM_ATB_BUS_OVERFLOW:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ATB bus overflow\n");
+		break;
+	case A4XX_INT_UCHE_OOB_ACCESS:
+		KGSL_DRV_CRIT_RATELIMIT(device, "UCHE: Out of bounds access\n");
+		break;
+	case A4XX_INT_RBBM_DPM_CALC_ERR:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: dpm calc error\n");
+		break;
+	case A4XX_INT_RBBM_DPM_EPOCH_ERR:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: dpm epoch error\n");
+		break;
+	case A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: dpm thermal yellow\n");
+		break;
+	case A4XX_INT_RBBM_DPM_THERMAL_RED_ERR:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: dpm thermal red\n");
+		break;
+	default:
+		KGSL_DRV_CRIT_RATELIMIT(device, "Unknown interrupt\n");
+	}
+}
+
+static unsigned int a4xx_int_bits[ADRENO_INT_BITS_MAX] = {
+	ADRENO_INT_DEFINE(ADRENO_INT_RBBM_AHB_ERROR, A4XX_INT_RBBM_AHB_ERROR),
+};
+
+/* Register offset defines for A4XX, in order of enum adreno_regs */
+static unsigned int a4xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_WADDR, A4XX_CP_ME_RAM_WADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_DATA, A4XX_CP_ME_RAM_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PFP_UCODE_DATA, A4XX_CP_PFP_UCODE_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PFP_UCODE_ADDR, A4XX_CP_PFP_UCODE_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_WFI_PEND_CTR, A4XX_CP_WFI_PEND_CTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A4XX_CP_RB_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_LO, A4XX_CP_RB_RPTR_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A4XX_CP_RB_RPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A4XX_CP_RB_WPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A4XX_CP_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A4XX_CP_ME_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A4XX_CP_RB_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A4XX_CP_IB1_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A4XX_CP_IB1_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A4XX_CP_IB2_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A4XX_CP_IB2_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_RAM_RADDR, A4XX_CP_ME_RAM_RADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_ADDR, A4XX_CP_ROQ_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_DATA, A4XX_CP_ROQ_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_ADDR, A4XX_CP_MERCIU_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA, A4XX_CP_MERCIU_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA2, A4XX_CP_MERCIU_DATA2),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_ADDR, A4XX_CP_MEQ_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_DATA, A4XX_CP_MEQ_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_HW_FAULT, A4XX_CP_HW_FAULT),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_STATUS, A4XX_CP_PROTECT_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG6, A4XX_CP_SCRATCH_REG6),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_SCRATCH_REG7, A4XX_CP_SCRATCH_REG7),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A4XX_CP_PREEMPT),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DEBUG, A4XX_CP_PREEMPT_DEBUG),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DISABLE,
+						A4XX_CP_PREEMPT_DISABLE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A4XX_CP_PROTECT_REG_0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A4XX_RBBM_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_CTL, A4XX_RBBM_PERFCTR_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
+					A4XX_RBBM_PERFCTR_LOAD_CMD0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
+					A4XX_RBBM_PERFCTR_LOAD_CMD1),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2,
+				A4XX_RBBM_PERFCTR_LOAD_CMD2),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3,
+				ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_PWR_1_LO,
+					A4XX_RBBM_PERFCTR_PWR_1_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A4XX_RBBM_INT_0_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_STATUS, A4XX_RBBM_INT_0_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A4XX_RBBM_CLOCK_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_VPC_DEBUG_RAM_SEL,
+					A4XX_VPC_DEBUG_RAM_SEL),
+	ADRENO_REG_DEFINE(ADRENO_REG_VPC_DEBUG_RAM_READ,
+					A4XX_VPC_DEBUG_RAM_READ),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_CLEAR_CMD,
+				A4XX_RBBM_INT_CLEAR_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_RBBM_CTL, A4XX_RBBM_RBBM_CTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A4XX_RBBM_SW_RESET_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE0, A4XX_UCHE_INVALIDATE0),
+	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE1, A4XX_UCHE_INVALIDATE1),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
+				A4XX_RBBM_PERFCTR_LOAD_VALUE_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
+				A4XX_RBBM_PERFCTR_LOAD_VALUE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TRUST_CONTROL,
+				A4XX_RBBM_SECVID_TRUST_CONTROL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
+				A4XX_RBBM_ALWAYSON_COUNTER_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI,
+				A4XX_RBBM_ALWAYSON_COUNTER_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TRUST_CONFIG,
+				A4XX_RBBM_SECVID_TRUST_CONFIG),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_CONTROL,
+				A4XX_RBBM_SECVID_TSB_CONTROL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE,
+				A4XX_RBBM_SECVID_TSB_TRUSTED_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_SIZE,
+				A4XX_RBBM_SECVID_TSB_TRUSTED_SIZE),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL0,
+				A4XX_VBIF_XIN_HALT_CTRL0),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL1,
+				A4XX_VBIF_XIN_HALT_CTRL1),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_VERSION,
+				A4XX_VBIF_VERSION),
+};
+
+static const struct adreno_reg_offsets a4xx_reg_offsets = {
+	.offsets = a4xx_register_offsets,
+	.offset_0 = ADRENO_REG_REGISTER_MAX,
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_0_LO,
+		A4XX_RBBM_PERFCTR_CP_0_HI, 0, A4XX_CP_PERFCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_1_LO,
+		A4XX_RBBM_PERFCTR_CP_1_HI, 1, A4XX_CP_PERFCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_2_LO,
+		A4XX_RBBM_PERFCTR_CP_2_HI, 2, A4XX_CP_PERFCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_3_LO,
+		A4XX_RBBM_PERFCTR_CP_3_HI, 3, A4XX_CP_PERFCTR_CP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_4_LO,
+		A4XX_RBBM_PERFCTR_CP_4_HI, 4, A4XX_CP_PERFCTR_CP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_5_LO,
+		A4XX_RBBM_PERFCTR_CP_5_HI, 5, A4XX_CP_PERFCTR_CP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_6_LO,
+		A4XX_RBBM_PERFCTR_CP_6_HI, 6, A4XX_CP_PERFCTR_CP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_7_LO,
+		A4XX_RBBM_PERFCTR_CP_7_HI, 7, A4XX_CP_PERFCTR_CP_SEL_7 },
+};
+
+/*
+ * Special list of CP registers for 420 to account for flaws.  This array is
+ * inserted into the tables during perfcounter init
+ */
+static struct adreno_perfcount_register a420_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_0_LO,
+		A4XX_RBBM_PERFCTR_CP_0_HI, 0, A4XX_CP_PERFCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_1_LO,
+		A4XX_RBBM_PERFCTR_CP_1_HI, 1, A4XX_CP_PERFCTR_CP_SEL_1 },
+	/*
+	 * The selector registers for 3, 5, and 7 are swizzled on the hardware.
+	 * CP_4 and CP_6 are duped to SEL_2 and SEL_3 so we don't enable them
+	 * here
+	 */
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_3_LO,
+		A4XX_RBBM_PERFCTR_CP_3_HI, 3, A4XX_CP_PERFCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_5_LO,
+		A4XX_RBBM_PERFCTR_CP_5_HI, 5, A4XX_CP_PERFCTR_CP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CP_7_LO,
+		A4XX_RBBM_PERFCTR_CP_7_HI, 7, A4XX_CP_PERFCTR_CP_SEL_4 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_rbbm[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RBBM_0_LO,
+		A4XX_RBBM_PERFCTR_RBBM_0_HI, 8, A4XX_RBBM_PERFCTR_RBBM_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RBBM_1_LO,
+		A4XX_RBBM_PERFCTR_RBBM_1_HI, 9, A4XX_RBBM_PERFCTR_RBBM_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RBBM_2_LO,
+		A4XX_RBBM_PERFCTR_RBBM_2_HI, 10, A4XX_RBBM_PERFCTR_RBBM_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RBBM_3_LO,
+		A4XX_RBBM_PERFCTR_RBBM_3_HI, 11, A4XX_RBBM_PERFCTR_RBBM_SEL_3 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_0_LO,
+		A4XX_RBBM_PERFCTR_PC_0_HI, 12, A4XX_PC_PERFCTR_PC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_1_LO,
+		A4XX_RBBM_PERFCTR_PC_1_HI, 13, A4XX_PC_PERFCTR_PC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_2_LO,
+		A4XX_RBBM_PERFCTR_PC_2_HI, 14, A4XX_PC_PERFCTR_PC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_3_LO,
+		A4XX_RBBM_PERFCTR_PC_3_HI, 15, A4XX_PC_PERFCTR_PC_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_4_LO,
+		A4XX_RBBM_PERFCTR_PC_4_HI, 16, A4XX_PC_PERFCTR_PC_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_5_LO,
+		A4XX_RBBM_PERFCTR_PC_5_HI, 17, A4XX_PC_PERFCTR_PC_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_6_LO,
+		A4XX_RBBM_PERFCTR_PC_6_HI, 18, A4XX_PC_PERFCTR_PC_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PC_7_LO,
+		A4XX_RBBM_PERFCTR_PC_7_HI, 19, A4XX_PC_PERFCTR_PC_SEL_7 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_0_LO,
+		A4XX_RBBM_PERFCTR_VFD_0_HI, 20, A4XX_VFD_PERFCTR_VFD_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_1_LO,
+		A4XX_RBBM_PERFCTR_VFD_1_HI, 21, A4XX_VFD_PERFCTR_VFD_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_2_LO,
+		A4XX_RBBM_PERFCTR_VFD_2_HI, 22, A4XX_VFD_PERFCTR_VFD_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_3_LO,
+		A4XX_RBBM_PERFCTR_VFD_3_HI, 23, A4XX_VFD_PERFCTR_VFD_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_4_LO,
+		A4XX_RBBM_PERFCTR_VFD_4_HI, 24, A4XX_VFD_PERFCTR_VFD_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_5_LO,
+		A4XX_RBBM_PERFCTR_VFD_5_HI, 25, A4XX_VFD_PERFCTR_VFD_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_6_LO,
+		A4XX_RBBM_PERFCTR_VFD_6_HI, 26, A4XX_VFD_PERFCTR_VFD_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VFD_7_LO,
+		A4XX_RBBM_PERFCTR_VFD_7_HI, 27, A4XX_VFD_PERFCTR_VFD_SEL_7 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_0_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_0_HI, 28, A4XX_HLSQ_PERFCTR_HLSQ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_1_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_1_HI, 29, A4XX_HLSQ_PERFCTR_HLSQ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_2_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_2_HI, 30, A4XX_HLSQ_PERFCTR_HLSQ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_3_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_3_HI, 31, A4XX_HLSQ_PERFCTR_HLSQ_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_4_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_4_HI, 32, A4XX_HLSQ_PERFCTR_HLSQ_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_5_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_5_HI, 33, A4XX_HLSQ_PERFCTR_HLSQ_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_6_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_6_HI, 34, A4XX_HLSQ_PERFCTR_HLSQ_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_HLSQ_7_LO,
+		A4XX_RBBM_PERFCTR_HLSQ_7_HI, 35, A4XX_HLSQ_PERFCTR_HLSQ_SEL_7 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VPC_0_LO,
+		A4XX_RBBM_PERFCTR_VPC_0_HI, 36, A4XX_VPC_PERFCTR_VPC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VPC_1_LO,
+		A4XX_RBBM_PERFCTR_VPC_1_HI, 37, A4XX_VPC_PERFCTR_VPC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VPC_2_LO,
+		A4XX_RBBM_PERFCTR_VPC_2_HI, 38, A4XX_VPC_PERFCTR_VPC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VPC_3_LO,
+		A4XX_RBBM_PERFCTR_VPC_3_HI, 39, A4XX_VPC_PERFCTR_VPC_SEL_3 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CCU_0_LO,
+		A4XX_RBBM_PERFCTR_CCU_0_HI, 40, A4XX_RB_PERFCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CCU_1_LO,
+		A4XX_RBBM_PERFCTR_CCU_1_HI, 41, A4XX_RB_PERFCTR_CCU_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CCU_2_LO,
+		A4XX_RBBM_PERFCTR_CCU_2_HI, 42, A4XX_RB_PERFCTR_CCU_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_CCU_3_LO,
+		A4XX_RBBM_PERFCTR_CCU_3_HI, 43, A4XX_RB_PERFCTR_CCU_SEL_3 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TSE_0_LO,
+		A4XX_RBBM_PERFCTR_TSE_0_HI, 44, A4XX_GRAS_PERFCTR_TSE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TSE_1_LO,
+		A4XX_RBBM_PERFCTR_TSE_1_HI, 45, A4XX_GRAS_PERFCTR_TSE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TSE_2_LO,
+		A4XX_RBBM_PERFCTR_TSE_2_HI, 46, A4XX_GRAS_PERFCTR_TSE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TSE_3_LO,
+		A4XX_RBBM_PERFCTR_TSE_3_HI, 47, A4XX_GRAS_PERFCTR_TSE_SEL_3 },
+};
+
+
+static struct adreno_perfcount_register a4xx_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RAS_0_LO,
+		A4XX_RBBM_PERFCTR_RAS_0_HI, 48, A4XX_GRAS_PERFCTR_RAS_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RAS_1_LO,
+		A4XX_RBBM_PERFCTR_RAS_1_HI, 49, A4XX_GRAS_PERFCTR_RAS_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RAS_2_LO,
+		A4XX_RBBM_PERFCTR_RAS_2_HI, 50, A4XX_GRAS_PERFCTR_RAS_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RAS_3_LO,
+		A4XX_RBBM_PERFCTR_RAS_3_HI, 51, A4XX_GRAS_PERFCTR_RAS_SEL_3 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_0_LO,
+		A4XX_RBBM_PERFCTR_UCHE_0_HI, 52, A4XX_UCHE_PERFCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_1_LO,
+		A4XX_RBBM_PERFCTR_UCHE_1_HI, 53, A4XX_UCHE_PERFCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_2_LO,
+		A4XX_RBBM_PERFCTR_UCHE_2_HI, 54, A4XX_UCHE_PERFCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_3_LO,
+		A4XX_RBBM_PERFCTR_UCHE_3_HI, 55, A4XX_UCHE_PERFCTR_UCHE_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_4_LO,
+		A4XX_RBBM_PERFCTR_UCHE_4_HI, 56, A4XX_UCHE_PERFCTR_UCHE_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_5_LO,
+		A4XX_RBBM_PERFCTR_UCHE_5_HI, 57, A4XX_UCHE_PERFCTR_UCHE_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_6_LO,
+		A4XX_RBBM_PERFCTR_UCHE_6_HI, 58, A4XX_UCHE_PERFCTR_UCHE_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_UCHE_7_LO,
+		A4XX_RBBM_PERFCTR_UCHE_7_HI, 59, A4XX_UCHE_PERFCTR_UCHE_SEL_7 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_0_LO,
+		A4XX_RBBM_PERFCTR_TP_0_HI, 60, A4XX_TPL1_PERFCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_1_LO,
+		A4XX_RBBM_PERFCTR_TP_1_HI, 61, A4XX_TPL1_PERFCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_2_LO,
+		A4XX_RBBM_PERFCTR_TP_2_HI, 62, A4XX_TPL1_PERFCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_3_LO,
+		A4XX_RBBM_PERFCTR_TP_3_HI, 63, A4XX_TPL1_PERFCTR_TP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_4_LO,
+		A4XX_RBBM_PERFCTR_TP_4_HI, 64, A4XX_TPL1_PERFCTR_TP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_5_LO,
+		A4XX_RBBM_PERFCTR_TP_5_HI, 65, A4XX_TPL1_PERFCTR_TP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_6_LO,
+		A4XX_RBBM_PERFCTR_TP_6_HI, 66, A4XX_TPL1_PERFCTR_TP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_TP_7_LO,
+		A4XX_RBBM_PERFCTR_TP_7_HI, 67, A4XX_TPL1_PERFCTR_TP_SEL_7 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_0_LO,
+		A4XX_RBBM_PERFCTR_SP_0_HI, 68, A4XX_SP_PERFCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_1_LO,
+		A4XX_RBBM_PERFCTR_SP_1_HI, 69, A4XX_SP_PERFCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_2_LO,
+		A4XX_RBBM_PERFCTR_SP_2_HI, 70, A4XX_SP_PERFCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_3_LO,
+		A4XX_RBBM_PERFCTR_SP_3_HI, 71, A4XX_SP_PERFCTR_SP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_4_LO,
+		A4XX_RBBM_PERFCTR_SP_4_HI, 72, A4XX_SP_PERFCTR_SP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_5_LO,
+		A4XX_RBBM_PERFCTR_SP_5_HI, 73, A4XX_SP_PERFCTR_SP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_6_LO,
+		A4XX_RBBM_PERFCTR_SP_6_HI, 74, A4XX_SP_PERFCTR_SP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_7_LO,
+		A4XX_RBBM_PERFCTR_SP_7_HI, 75, A4XX_SP_PERFCTR_SP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_8_LO,
+		A4XX_RBBM_PERFCTR_SP_8_HI, 76, A4XX_SP_PERFCTR_SP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_9_LO,
+		A4XX_RBBM_PERFCTR_SP_9_HI, 77, A4XX_SP_PERFCTR_SP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_10_LO,
+		A4XX_RBBM_PERFCTR_SP_10_HI, 78, A4XX_SP_PERFCTR_SP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_SP_11_LO,
+		A4XX_RBBM_PERFCTR_SP_11_HI, 79, A4XX_SP_PERFCTR_SP_SEL_11 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_0_LO,
+		A4XX_RBBM_PERFCTR_RB_0_HI, 80, A4XX_RB_PERFCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_1_LO,
+		A4XX_RBBM_PERFCTR_RB_1_HI, 81, A4XX_RB_PERFCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_2_LO,
+		A4XX_RBBM_PERFCTR_RB_2_HI, 82, A4XX_RB_PERFCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_3_LO,
+		A4XX_RBBM_PERFCTR_RB_3_HI, 83, A4XX_RB_PERFCTR_RB_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_4_LO,
+		A4XX_RBBM_PERFCTR_RB_4_HI, 84, A4XX_RB_PERFCTR_RB_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_5_LO,
+		A4XX_RBBM_PERFCTR_RB_5_HI, 85, A4XX_RB_PERFCTR_RB_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_6_LO,
+		A4XX_RBBM_PERFCTR_RB_6_HI, 86, A4XX_RB_PERFCTR_RB_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_RB_7_LO,
+		A4XX_RBBM_PERFCTR_RB_7_HI, 87, A4XX_RB_PERFCTR_RB_SEL_7 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_vsc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VSC_0_LO,
+		A4XX_RBBM_PERFCTR_VSC_0_HI, 88, A4XX_VSC_PERFCTR_VSC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_VSC_1_LO,
+		A4XX_RBBM_PERFCTR_VSC_1_HI, 89, A4XX_VSC_PERFCTR_VSC_SEL_1 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PWR_0_LO,
+		A4XX_RBBM_PERFCTR_PWR_0_HI, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_PERFCTR_PWR_1_LO,
+		A4XX_RBBM_PERFCTR_PWR_1_HI, -1, 0},
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_vbif[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_CNT_LOW0,
+		A4XX_VBIF_PERF_CNT_HIGH0, -1, A4XX_VBIF_PERF_CNT_SEL0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_CNT_LOW1,
+		A4XX_VBIF_PERF_CNT_HIGH1, -1, A4XX_VBIF_PERF_CNT_SEL1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_CNT_LOW2,
+		A4XX_VBIF_PERF_CNT_HIGH2, -1, A4XX_VBIF_PERF_CNT_SEL2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_CNT_LOW3,
+		A4XX_VBIF_PERF_CNT_HIGH3, -1, A4XX_VBIF_PERF_CNT_SEL3 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_vbif_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_PWR_CNT_LOW0,
+		A4XX_VBIF_PERF_PWR_CNT_HIGH0, -1, A4XX_VBIF_PERF_PWR_CNT_EN0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_PWR_CNT_LOW1,
+		A4XX_VBIF_PERF_PWR_CNT_HIGH1, -1, A4XX_VBIF_PERF_PWR_CNT_EN1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_PWR_CNT_LOW2,
+		A4XX_VBIF_PERF_PWR_CNT_HIGH2, -1, A4XX_VBIF_PERF_PWR_CNT_EN2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_VBIF_PERF_PWR_CNT_LOW3,
+		A4XX_VBIF_PERF_PWR_CNT_HIGH3, -1, A4XX_VBIF_PERF_PWR_CNT_EN3 },
+};
+
+static struct adreno_perfcount_register a4xx_perfcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A4XX_RBBM_ALWAYSON_COUNTER_LO,
+		A4XX_RBBM_ALWAYSON_COUNTER_HI, -1 },
+};
+
+#define A4XX_PERFCOUNTER_GROUP(offset, name) \
+	ADRENO_PERFCOUNTER_GROUP(a4xx, offset, name)
+
+#define A4XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(a4xx, offset, name, flags)
+
+static struct adreno_perfcount_group a4xx_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	A4XX_PERFCOUNTER_GROUP(CP, cp),
+	A4XX_PERFCOUNTER_GROUP(RBBM, rbbm),
+	A4XX_PERFCOUNTER_GROUP(PC, pc),
+	A4XX_PERFCOUNTER_GROUP(VFD, vfd),
+	A4XX_PERFCOUNTER_GROUP(HLSQ, hlsq),
+	A4XX_PERFCOUNTER_GROUP(VPC, vpc),
+	A4XX_PERFCOUNTER_GROUP(CCU, ccu),
+	A4XX_PERFCOUNTER_GROUP(TSE, tse),
+	A4XX_PERFCOUNTER_GROUP(RAS, ras),
+	A4XX_PERFCOUNTER_GROUP(UCHE, uche),
+	A4XX_PERFCOUNTER_GROUP(TP, tp),
+	A4XX_PERFCOUNTER_GROUP(SP, sp),
+	A4XX_PERFCOUNTER_GROUP(RB, rb),
+	A4XX_PERFCOUNTER_GROUP(VSC, vsc),
+	A4XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+	A4XX_PERFCOUNTER_GROUP(VBIF, vbif),
+	A4XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+	A4XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+};
+
+static struct adreno_perfcounters a4xx_perfcounters = {
+	a4xx_perfcounter_groups,
+	ARRAY_SIZE(a4xx_perfcounter_groups),
+};
+
+static struct adreno_ft_perf_counters a4xx_ft_perf_counters[] = {
+	{KGSL_PERFCOUNTER_GROUP_SP, A4XX_SP_ALU_ACTIVE_CYCLES},
+	{KGSL_PERFCOUNTER_GROUP_SP, A4XX_SP0_ICL1_MISSES},
+	{KGSL_PERFCOUNTER_GROUP_SP, A4XX_SP_FS_CFLOW_INSTRUCTIONS},
+	{KGSL_PERFCOUNTER_GROUP_TSE, A4XX_TSE_INPUT_PRIM_NUM},
+};
+
+/*
+ * On A420 a number of perfcounters are un-usable. The following defines the
+ * array of countables that do not work and should not be used
+ */
+static const unsigned int a420_pc_invalid_countables[] = {
+	PC_INSTANCES, PC_VERTEX_HITS, PC_GENERATED_FIBERS, PC_GENERATED_WAVES,
+};
+
+static const unsigned int a420_vfd_invalid_countables[] = {
+	VFD_VPC_BYPASS_TRANS, VFD_UPPER_SHADER_FIBERS, VFD_LOWER_SHADER_FIBERS,
+};
+
+static const unsigned int a420_hlsq_invalid_countables[] = {
+	HLSQ_SP_VS_STAGE_CONSTANT, HLSQ_SP_VS_STAGE_INSTRUCTIONS,
+	HLSQ_SP_FS_STAGE_CONSTANT, HLSQ_SP_FS_STAGE_INSTRUCTIONS,
+	HLSQ_FS_STAGE_16_WAVES, HLSQ_FS_STAGE_32_WAVES, HLSQ_FS_STAGE_64_WAVES,
+	HLSQ_VS_STAGE_16_WAVES, HLSQ_VS_STAGE_32_WAVES,
+};
+
+static const unsigned int a420_uche_invalid_countables[] = {
+	UCHE_READ_REQUESTS_MARB, UCHE_READ_REQUESTS_SP,
+	UCHE_WRITE_REQUESTS_MARB, UCHE_WRITE_REQUESTS_SP,
+	UCHE_WRITE_REQUESTS_VPC
+};
+
+static const unsigned int a420_tp_invalid_countables[] = {
+	TP_OUTPUT_TEXELS_POINT, TP_OUTPUT_TEXELS_BILINEAR, TP_OUTPUT_TEXELS_MIP,
+	TP_OUTPUT_TEXELS_ANISO, TP_OUTPUT_TEXELS_OPS16, TP_OUTPUT_TEXELS_OPS32,
+	TP_ZERO_LOD, TP_LATENCY, TP_LATENCY_TRANS,
+};
+
+static const unsigned int a420_sp_invalid_countables[] = {
+	SP_FS_STAGE_BARY_INSTRUCTIONS,
+};
+
+static const unsigned int a420_rb_invalid_countables[] = {
+	RB_VALID_SAMPLES, RB_Z_FAIL, RB_S_FAIL,
+};
+
+static const unsigned int a420_ccu_invalid_countables[] = {
+	CCU_VBIF_STALL, CCU_VBIF_LATENCY_CYCLES, CCU_VBIF_LATENCY_SAMPLES,
+	CCU_Z_READ, CCU_Z_WRITE, CCU_C_READ, CCU_C_WRITE,
+};
+
+static const struct adreno_invalid_countables
+	a420_perfctr_invalid_countables[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_pc, PC),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_vfd, VFD),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_hlsq, HLSQ),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_tp, TP),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_sp, SP),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_rb, RB),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_ccu, CCU),
+	ADRENO_PERFCOUNTER_INVALID_COUNTABLE(a420_uche, UCHE),
+};
+
+static struct adreno_coresight_register a4xx_coresight_registers[] = {
+	{ A4XX_RBBM_CFG_DEBBUS_CTLT },
+	{ A4XX_RBBM_CFG_DEBBUS_SEL_A },
+	{ A4XX_RBBM_CFG_DEBBUS_SEL_B },
+	{ A4XX_RBBM_CFG_DEBBUS_SEL_C },
+	{ A4XX_RBBM_CFG_DEBBUS_SEL_D },
+	{ A4XX_RBBM_CFG_DEBBUS_OPL },
+	{ A4XX_RBBM_CFG_DEBBUS_OPE },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTL_0 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTL_1 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTL_2 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTL_3 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKL_0 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKL_1 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKL_2 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKL_3 },
+	{ A4XX_RBBM_CFG_DEBBUS_BYTEL_0 },
+	{ A4XX_RBBM_CFG_DEBBUS_BYTEL_1 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTE_0 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTE_1 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTE_2 },
+	{ A4XX_RBBM_CFG_DEBBUS_IVTE_3 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKE_0 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKE_1 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKE_2 },
+	{ A4XX_RBBM_CFG_DEBBUS_MASKE_3 },
+	{ A4XX_RBBM_CFG_DEBBUS_NIBBLEE },
+	{ A4XX_RBBM_CFG_DEBBUS_PTRC0 },
+	{ A4XX_RBBM_CFG_DEBBUS_PTRC1 },
+	{ A4XX_RBBM_CFG_DEBBUS_CLRC },
+	{ A4XX_RBBM_CFG_DEBBUS_LOADIVT },
+	{ A4XX_RBBM_CFG_DEBBUS_IDX },
+	{ A4XX_RBBM_CFG_DEBBUS_LOADREG },
+	{ A4XX_RBBM_EXT_TRACE_BUS_CTL },
+	{ A4XX_RBBM_CFG_DEBBUS_CTLM },
+};
+
+static void a4xx_perfcounter_init(struct adreno_device *adreno_dev)
+{
+	if (adreno_is_a420(adreno_dev)) {
+		struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+		struct adreno_perfcounters *counters = gpudev->perfcounters;
+
+		/*
+		 * The CP counters on A420 are... special.  Some of the counters
+		 * are swizzled so only a subset of them are usable
+		 */
+
+		if (counters != NULL) {
+			counters->groups[KGSL_PERFCOUNTER_GROUP_CP].regs =
+				a420_perfcounters_cp;
+			counters->groups[KGSL_PERFCOUNTER_GROUP_CP].reg_count =
+				ARRAY_SIZE(a420_perfcounters_cp);
+		}
+
+		/*
+		 * Also on A420 a number of the countables are not functional so
+		 * we maintain a blacklist of countables to protect the user
+		 */
+
+		gpudev->invalid_countables = a420_perfctr_invalid_countables;
+	}
+
+	/*
+	 * Enable the GPU busy count counter. This is a fixed counter on
+	 * A4XX so we don't need to bother checking the return value
+	 */
+
+	adreno_perfcounter_get(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
+		NULL, NULL, PERFCOUNTER_FLAG_KERNEL);
+}
+
+static void a4xx_perfcounter_close(struct adreno_device *adreno_dev)
+{
+	adreno_perfcounter_put(adreno_dev, KGSL_PERFCOUNTER_GROUP_PWR, 1,
+		PERFCOUNTER_FLAG_KERNEL);
+}
+
+static const unsigned int _a4xx_pwron_fixup_fs_instructions[] = {
+	0x00000000, 0x304CC300, 0x00000000, 0x304CC304,
+	0x00000000, 0x304CC308, 0x00000000, 0x304CC30C,
+	0x00000000, 0x304CC310, 0x00000000, 0x304CC314,
+	0x00000000, 0x304CC318, 0x00000000, 0x304CC31C,
+	0x00000000, 0x304CC320, 0x00000000, 0x304CC324,
+	0x00000000, 0x304CC328, 0x00000000, 0x304CC32C,
+	0x00000000, 0x304CC330, 0x00000000, 0x304CC334,
+	0x00000000, 0x304CC338, 0x00000000, 0x304CC33C,
+	0x00000000, 0x00000400, 0x00020000, 0x63808003,
+	0x00060004, 0x63828007, 0x000A0008, 0x6384800B,
+	0x000E000C, 0x6386800F, 0x00120010, 0x63888013,
+	0x00160014, 0x638A8017, 0x001A0018, 0x638C801B,
+	0x001E001C, 0x638E801F, 0x00220020, 0x63908023,
+	0x00260024, 0x63928027, 0x002A0028, 0x6394802B,
+	0x002E002C, 0x6396802F, 0x00320030, 0x63988033,
+	0x00360034, 0x639A8037, 0x003A0038, 0x639C803B,
+	0x003E003C, 0x639E803F, 0x00000000, 0x00000400,
+	0x00000003, 0x80D00003, 0x00000007, 0x80D00007,
+	0x0000000B, 0x80D0000B, 0x0000000F, 0x80D0000F,
+	0x00000013, 0x80D00013, 0x00000017, 0x80D00017,
+	0x0000001B, 0x80D0001B, 0x0000001F, 0x80D0001F,
+	0x00000023, 0x80D00023, 0x00000027, 0x80D00027,
+	0x0000002B, 0x80D0002B, 0x0000002F, 0x80D0002F,
+	0x00000033, 0x80D00033, 0x00000037, 0x80D00037,
+	0x0000003B, 0x80D0003B, 0x0000003F, 0x80D0003F,
+	0x00000000, 0x00000400, 0xFFFFFFFF, 0x304CC300,
+	0xFFFFFFFF, 0x304CC304, 0xFFFFFFFF, 0x304CC308,
+	0xFFFFFFFF, 0x304CC30C, 0xFFFFFFFF, 0x304CC310,
+	0xFFFFFFFF, 0x304CC314, 0xFFFFFFFF, 0x304CC318,
+	0xFFFFFFFF, 0x304CC31C, 0xFFFFFFFF, 0x304CC320,
+	0xFFFFFFFF, 0x304CC324, 0xFFFFFFFF, 0x304CC328,
+	0xFFFFFFFF, 0x304CC32C, 0xFFFFFFFF, 0x304CC330,
+	0xFFFFFFFF, 0x304CC334, 0xFFFFFFFF, 0x304CC338,
+	0xFFFFFFFF, 0x304CC33C, 0x00000000, 0x00000400,
+	0x00020000, 0x63808003, 0x00060004, 0x63828007,
+	0x000A0008, 0x6384800B, 0x000E000C, 0x6386800F,
+	0x00120010, 0x63888013, 0x00160014, 0x638A8017,
+	0x001A0018, 0x638C801B, 0x001E001C, 0x638E801F,
+	0x00220020, 0x63908023, 0x00260024, 0x63928027,
+	0x002A0028, 0x6394802B, 0x002E002C, 0x6396802F,
+	0x00320030, 0x63988033, 0x00360034, 0x639A8037,
+	0x003A0038, 0x639C803B, 0x003E003C, 0x639E803F,
+	0x00000000, 0x00000400, 0x00000003, 0x80D00003,
+	0x00000007, 0x80D00007, 0x0000000B, 0x80D0000B,
+	0x0000000F, 0x80D0000F, 0x00000013, 0x80D00013,
+	0x00000017, 0x80D00017, 0x0000001B, 0x80D0001B,
+	0x0000001F, 0x80D0001F, 0x00000023, 0x80D00023,
+	0x00000027, 0x80D00027, 0x0000002B, 0x80D0002B,
+	0x0000002F, 0x80D0002F, 0x00000033, 0x80D00033,
+	0x00000037, 0x80D00037, 0x0000003B, 0x80D0003B,
+	0x0000003F, 0x80D0003F, 0x00000000, 0x03000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+/**
+ * _a4xx_pwron_fixup() - Initialize a special command buffer to run a
+ * post-power collapse shader workaround
+ * @adreno_dev: Pointer to a adreno_device struct
+ *
+ * Some targets require a special workaround shader to be executed after
+ * power-collapse.  Construct the IB once at init time and keep it
+ * handy
+ *
+ * Returns: 0 on success or negative on error
+ */
+static int _a4xx_pwron_fixup(struct adreno_device *adreno_dev)
+{
+	unsigned int *cmds;
+	unsigned int count = ARRAY_SIZE(_a4xx_pwron_fixup_fs_instructions);
+	unsigned int num_units = count >> 5;
+	int ret;
+
+	/* Return if the fixup is already in place */
+	if (test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
+		return 0;
+
+	ret = kgsl_allocate_global(KGSL_DEVICE(adreno_dev),
+		&adreno_dev->pwron_fixup, PAGE_SIZE,
+		KGSL_MEMFLAGS_GPUREADONLY, 0, "pwron_fixup");
+
+	if (ret)
+		return ret;
+
+	cmds = adreno_dev->pwron_fixup.hostptr;
+
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A4XX_SP_MODE_CONTROL, 1);
+	*cmds++ = 0x00000018;
+	*cmds++ = cp_type0_packet(A4XX_TPL1_TP_MODE_CONTROL, 1);
+	*cmds++ = 0x00000002;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A4xx_HLSQ_CONTROL_0, 5);
+	*cmds++ = 0x800001a0;
+	*cmds++ = 0xfcfc0000;
+	*cmds++ = 0xcff3f3f0;
+	*cmds++ = 0xfcfcfcfc;
+	*cmds++ = 0xccfcfcfc;
+	*cmds++ = cp_type0_packet(A4XX_SP_FS_CTRL_1, 1);
+	*cmds++ = 0x80000000;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_UPDATE_CONTROL, 1);
+	*cmds++ = 0x00000038;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_MODE_CONTROL, 1);
+	*cmds++ = 0x00000003;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_UPDATE_CONTROL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A4XX_TPL1_TP_TEX_TSIZE_1, 1);
+	*cmds++ = 0x00008000;
+	*cmds++ = cp_type0_packet(A4xx_HLSQ_CONTROL_0, 2);
+	*cmds++ = 0x800001a0;
+	*cmds++ = 0xfcfc0000;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_CS_CONTROL, 1);
+	*cmds++ = 0x00018030 | (num_units << 24);
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_CL_NDRANGE_0, 7);
+	*cmds++ = 0x000000fd;
+	*cmds++ = 0x00000040;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_CL_CONTROL_0, 2);
+	*cmds++ = 0x0001201f;
+	*cmds++ = 0x0000f003;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_CL_KERNEL_CONST, 1);
+	*cmds++ = 0x0001800b;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_CL_KERNEL_GROUP_X, 3);
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000001;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_CL_WG_OFFSET, 1);
+	*cmds++ = 0x00000022;
+	*cmds++ = cp_type0_packet(A4XX_UCHE_INVALIDATE0, 2);
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000012;
+	*cmds++ = cp_type0_packet(A4XX_HLSQ_MODE_CONTROL, 1);
+	*cmds++ = 0x00000003;
+	*cmds++ = cp_type0_packet(A4XX_SP_SP_CTRL, 1);
+	*cmds++ = 0x00920000;
+	*cmds++ = cp_type0_packet(A4XX_SP_INSTR_CACHE_CTRL, 1);
+	*cmds++ = 0x00000260;
+	*cmds++ = cp_type0_packet(A4XX_SP_CS_CTRL_0, 1);
+	*cmds++ = 0x00200400;
+	*cmds++ = cp_type0_packet(A4XX_SP_CS_OBJ_OFFSET, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A4XX_SP_CS_OBJ_START, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type0_packet(A4XX_SP_CS_LENGTH, 1);
+	*cmds++ =  num_units;
+	*cmds++ = cp_type0_packet(A4XX_SP_MODE_CONTROL, 1);
+	*cmds++ = 0x00000018;
+	*cmds++ = cp_type3_packet(CP_LOAD_STATE, 2 + count);
+	*cmds++ = 0x00340000 | (num_units << CP_LOADSTATE_NUMOFUNITS_SHIFT);
+	*cmds++ = 0x00000000;
+
+	memcpy(cmds, _a4xx_pwron_fixup_fs_instructions, count << 2);
+	cmds += count;
+
+	*cmds++ = cp_type3_packet(CP_EXEC_CL, 1);
+	*cmds++ = 0x00000000;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+	*cmds++ = 0x00000000;
+
+	/*
+	 * Remember the number of dwords in the command buffer for when we
+	 * program the indirect buffer call in the ringbuffer
+	 */
+	adreno_dev->pwron_fixup_dwords =
+		(cmds - (unsigned int *) adreno_dev->pwron_fixup.hostptr);
+
+	/* Mark the flag in ->priv to show that we have the fix */
+	set_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv);
+	return 0;
+}
+
+/*
+ * a4xx_init() - Initialize gpu specific data
+ * @adreno_dev: Pointer to adreno device
+ */
+static void a4xx_init(struct adreno_device *adreno_dev)
+{
+	if ((adreno_is_a405(adreno_dev)) || (adreno_is_a420(adreno_dev)))
+		_a4xx_pwron_fixup(adreno_dev);
+}
+
+static int a4xx_send_me_init(struct adreno_device *adreno_dev,
+			 struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds;
+	int ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 20);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	*cmds++ = cp_type3_packet(CP_ME_INIT, 17);
+
+	/*
+	 * Ordinal 2 of ME_INIT packet, the bits being set are:
+	 * Ordinal 3, 4, 5-12, 14, 15, 16, 17, 18 are present
+	 * Microcode Default Reset Control = 3
+	 */
+	*cmds++ = 0x000003f7;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000080;
+	*cmds++ = 0x00000100;
+	*cmds++ = 0x00000180;
+	*cmds++ = 0x00006600;
+	*cmds++ = 0x00000150;
+	*cmds++ = 0x0000014e;
+	*cmds++ = 0x00000154;
+	/* MAX Context */
+	*cmds++ = 0x00000001;
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	/* Enable protected mode registers for A3XX/A4XX */
+	*cmds++ = 0x20000000;
+
+	*cmds++ = 0x00000000;
+	*cmds++ = 0x00000000;
+
+	*cmds++ = cp_type3_packet(CP_PREEMPT_ENABLE, 1);
+	*cmds++ = 1;
+
+	ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
+	if (ret) {
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		dev_err(device->dev, "CP initialization failed to idle\n");
+		kgsl_device_snapshot(device, NULL);
+	}
+
+	return ret;
+}
+
+/*
+ * a4xx_rb_start() - Start the ringbuffer
+ * @adreno_dev: Pointer to adreno device
+ * @start_type: Warm or cold start
+ */
+static int a4xx_rb_start(struct adreno_device *adreno_dev,
+			 unsigned int start_type)
+{
+	struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+	struct kgsl_device *device = &adreno_dev->dev;
+	uint64_t addr;
+	int ret;
+
+	addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id);
+
+	adreno_writereg64(adreno_dev, ADRENO_REG_CP_RB_RPTR_ADDR_LO,
+			ADRENO_REG_CP_RB_RPTR_ADDR_HI, addr);
+
+	/*
+	 * The size of the ringbuffer in the hardware is the log2
+	 * representation of the size in quadwords (sizedwords / 2).
+	 * Also disable the host RPTR shadow register as it might be unreliable
+	 * in certain circumstances.
+	 */
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_CNTL,
+			((ilog2(4) << 8) & 0x1F00) |
+			(ilog2(KGSL_RB_DWORDS >> 1) & 0x3F));
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_BASE,
+			  rb->buffer_desc.gpuaddr);
+
+	ret = a3xx_microcode_load(adreno_dev, start_type);
+	if (ret)
+		return ret;
+
+	/* clear ME_HALT to start micro engine */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0);
+
+	ret = a4xx_send_me_init(adreno_dev, rb);
+	if (ret == 0) {
+		a4xx_enable_pc(adreno_dev);
+		a4xx_enable_ppd(adreno_dev);
+	}
+
+	return ret;
+}
+
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ctrlt, &a4xx_coresight_registers[0]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_sela, &a4xx_coresight_registers[1]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_selb, &a4xx_coresight_registers[2]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_selc, &a4xx_coresight_registers[3]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_seld, &a4xx_coresight_registers[4]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_opl, &a4xx_coresight_registers[5]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ope, &a4xx_coresight_registers[6]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivtl0, &a4xx_coresight_registers[7]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivtl1, &a4xx_coresight_registers[8]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivtl2, &a4xx_coresight_registers[9]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivtl3, &a4xx_coresight_registers[10]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maskl0, &a4xx_coresight_registers[11]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maskl1, &a4xx_coresight_registers[12]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maskl2, &a4xx_coresight_registers[13]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maskl3, &a4xx_coresight_registers[14]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_bytel0, &a4xx_coresight_registers[15]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_bytel1, &a4xx_coresight_registers[16]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivte0, &a4xx_coresight_registers[17]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivte1, &a4xx_coresight_registers[18]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivte2, &a4xx_coresight_registers[19]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ivte3, &a4xx_coresight_registers[20]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maske0, &a4xx_coresight_registers[21]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maske1, &a4xx_coresight_registers[22]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maske2, &a4xx_coresight_registers[23]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_maske3, &a4xx_coresight_registers[24]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_nibblee, &a4xx_coresight_registers[25]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ptrc0, &a4xx_coresight_registers[26]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ptrc1, &a4xx_coresight_registers[27]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_clrc, &a4xx_coresight_registers[28]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_loadivt, &a4xx_coresight_registers[29]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_idx, &a4xx_coresight_registers[30]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_loadreg, &a4xx_coresight_registers[31]);
+static ADRENO_CORESIGHT_ATTR(ext_tracebus_ctl, &a4xx_coresight_registers[32]);
+static ADRENO_CORESIGHT_ATTR(cfg_debbus_ctrlm, &a4xx_coresight_registers[33]);
+
+
+static struct attribute *a4xx_coresight_attrs[] = {
+	&coresight_attr_cfg_debbus_ctrlt.attr.attr,
+	&coresight_attr_cfg_debbus_sela.attr.attr,
+	&coresight_attr_cfg_debbus_selb.attr.attr,
+	&coresight_attr_cfg_debbus_selc.attr.attr,
+	&coresight_attr_cfg_debbus_seld.attr.attr,
+	&coresight_attr_cfg_debbus_opl.attr.attr,
+	&coresight_attr_cfg_debbus_ope.attr.attr,
+	&coresight_attr_cfg_debbus_ivtl0.attr.attr,
+	&coresight_attr_cfg_debbus_ivtl1.attr.attr,
+	&coresight_attr_cfg_debbus_ivtl2.attr.attr,
+	&coresight_attr_cfg_debbus_ivtl3.attr.attr,
+	&coresight_attr_cfg_debbus_maskl0.attr.attr,
+	&coresight_attr_cfg_debbus_maskl1.attr.attr,
+	&coresight_attr_cfg_debbus_maskl2.attr.attr,
+	&coresight_attr_cfg_debbus_maskl3.attr.attr,
+	&coresight_attr_cfg_debbus_bytel0.attr.attr,
+	&coresight_attr_cfg_debbus_bytel1.attr.attr,
+	&coresight_attr_cfg_debbus_ivte0.attr.attr,
+	&coresight_attr_cfg_debbus_ivte1.attr.attr,
+	&coresight_attr_cfg_debbus_ivte2.attr.attr,
+	&coresight_attr_cfg_debbus_ivte3.attr.attr,
+	&coresight_attr_cfg_debbus_maske0.attr.attr,
+	&coresight_attr_cfg_debbus_maske1.attr.attr,
+	&coresight_attr_cfg_debbus_maske2.attr.attr,
+	&coresight_attr_cfg_debbus_maske3.attr.attr,
+	&coresight_attr_cfg_debbus_nibblee.attr.attr,
+	&coresight_attr_cfg_debbus_ptrc0.attr.attr,
+	&coresight_attr_cfg_debbus_ptrc1.attr.attr,
+	&coresight_attr_cfg_debbus_clrc.attr.attr,
+	&coresight_attr_cfg_debbus_loadivt.attr.attr,
+	&coresight_attr_cfg_debbus_idx.attr.attr,
+	&coresight_attr_cfg_debbus_loadreg.attr.attr,
+	&coresight_attr_ext_tracebus_ctl.attr.attr,
+	&coresight_attr_cfg_debbus_ctrlm.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group a4xx_coresight_group = {
+	.attrs = a4xx_coresight_attrs,
+};
+
+static const struct attribute_group *a4xx_coresight_groups[] = {
+	&a4xx_coresight_group,
+	NULL,
+};
+
+static struct adreno_coresight a4xx_coresight = {
+	.registers = a4xx_coresight_registers,
+	.count = ARRAY_SIZE(a4xx_coresight_registers),
+	.groups = a4xx_coresight_groups,
+};
+
+static void a4xx_preempt_callback(struct adreno_device *adreno_dev, int bit)
+{
+	if (atomic_read(&adreno_dev->preempt.state) != ADRENO_PREEMPT_TRIGGERED)
+		return;
+
+	trace_adreno_hw_preempt_trig_to_comp_int(adreno_dev->cur_rb,
+			      adreno_dev->next_rb,
+			      adreno_get_rptr(adreno_dev->cur_rb),
+			      adreno_get_rptr(adreno_dev->next_rb));
+
+	adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+}
+
+#define A4XX_INT_MASK \
+	((1 << A4XX_INT_RBBM_AHB_ERROR) |		\
+	 (1 << A4XX_INT_RBBM_REG_TIMEOUT) |		\
+	 (1 << A4XX_INT_RBBM_ME_MS_TIMEOUT) |		\
+	 (1 << A4XX_INT_RBBM_PFP_MS_TIMEOUT) |		\
+	 (1 << A4XX_INT_RBBM_ETS_MS_TIMEOUT) |		\
+	 (1 << A4XX_INT_RBBM_ASYNC_OVERFLOW) |		\
+	 (1 << A4XX_INT_CP_SW) |			\
+	 (1 << A4XX_INT_CP_OPCODE_ERROR) |		\
+	 (1 << A4XX_INT_CP_RESERVED_BIT_ERROR) |	\
+	 (1 << A4XX_INT_CP_HW_FAULT) |			\
+	 (1 << A4XX_INT_CP_IB1_INT) |			\
+	 (1 << A4XX_INT_CP_IB2_INT) |			\
+	 (1 << A4XX_INT_CP_RB_INT) |			\
+	 (1 << A4XX_INT_CACHE_FLUSH_TS) |		\
+	 (1 << A4XX_INT_CP_REG_PROTECT_FAULT) |		\
+	 (1 << A4XX_INT_CP_AHB_ERROR_HALT) |		\
+	 (1 << A4XX_INT_RBBM_ATB_BUS_OVERFLOW) |	\
+	 (1 << A4XX_INT_UCHE_OOB_ACCESS) |		\
+	 (1 << A4XX_INT_RBBM_DPM_CALC_ERR) |		\
+	 (1 << A4XX_INT_RBBM_DPM_EPOCH_ERR) |		\
+	 (1 << A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR) |\
+	 (1 << A4XX_INT_RBBM_DPM_THERMAL_RED_ERR))
+
+
+static struct adreno_irq_funcs a4xx_irq_funcs[32] = {
+	ADRENO_IRQ_CALLBACK(NULL),                   /* 0 - RBBM_GPU_IDLE */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 1 - RBBM_AHB_ERROR */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 2 - RBBM_REG_TIMEOUT */
+	/* 3 - RBBM_ME_MS_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	/* 4 - RBBM_PFP_MS_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 5 - RBBM_ETS_MS_TIMEOUT */
+	/* 6 - RBBM_ATB_ASYNC_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	ADRENO_IRQ_CALLBACK(NULL), /* 7 - RBBM_GPC_ERR */
+	ADRENO_IRQ_CALLBACK(a4xx_preempt_callback), /* 8 - CP_SW */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 9 - CP_OPCODE_ERROR */
+	/* 10 - CP_RESERVED_BIT_ERROR */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 11 - CP_HW_FAULT */
+	ADRENO_IRQ_CALLBACK(NULL), /* 12 - CP_DMA */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 13 - CP_IB2_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 14 - CP_IB1_INT */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 15 - CP_RB_INT */
+	/* 16 - CP_REG_PROTECT_FAULT */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_VS_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 19 - CP_PS_DONE_TS */
+	ADRENO_IRQ_CALLBACK(adreno_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
+	/* 21 - CP_AHB_ERROR_FAULT */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(NULL), /* 23 - Unused */
+	/* 24 - MISC_HANG_DETECT */
+	ADRENO_IRQ_CALLBACK(adreno_hang_int_callback),
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 25 - UCHE_OOB_ACCESS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 26 - Unused */
+	ADRENO_IRQ_CALLBACK(NULL), /* 27 - RBBM_TRACE_MISR */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 28 - RBBM_DPM_CALC_ERR */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback), /* 29 - RBBM_DPM_EPOCH_ERR */
+	/* 30 - RBBM_DPM_THERMAL_YELLOW_ERR */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+	/* 31 - RBBM_DPM_THERMAL_RED_ERR */
+	ADRENO_IRQ_CALLBACK(a4xx_err_callback),
+};
+
+static struct adreno_irq a4xx_irq = {
+	.funcs = a4xx_irq_funcs,
+	.mask = A4XX_INT_MASK,
+};
+
+static struct adreno_snapshot_data a4xx_snapshot_data = {
+	.sect_sizes = &a4xx_snap_sizes,
+};
+
+struct adreno_gpudev adreno_a4xx_gpudev = {
+	.reg_offsets = &a4xx_reg_offsets,
+	.int_bits = a4xx_int_bits,
+	.ft_perf_counters = a4xx_ft_perf_counters,
+	.ft_perf_counters_count = ARRAY_SIZE(a4xx_ft_perf_counters),
+	.perfcounters = &a4xx_perfcounters,
+	.irq = &a4xx_irq,
+	.irq_trace = trace_kgsl_a4xx_irq_status,
+	.snapshot_data = &a4xx_snapshot_data,
+	.num_prio_levels = KGSL_PRIORITY_MAX_RB_LEVELS,
+	.vbif_xin_halt_ctrl0_mask = A4XX_VBIF_XIN_HALT_CTRL0_MASK,
+
+	.perfcounter_init = a4xx_perfcounter_init,
+	.perfcounter_close = a4xx_perfcounter_close,
+	.rb_start = a4xx_rb_start,
+	.init = a4xx_init,
+	.microcode_read = a3xx_microcode_read,
+	.coresight = &a4xx_coresight,
+	.start = a4xx_start,
+	.snapshot = a4xx_snapshot,
+	.is_sptp_idle = a4xx_is_sptp_idle,
+	.pwrlevel_change_settings = a4xx_pwrlevel_change_settings,
+	.regulator_enable = a4xx_regulator_enable,
+	.regulator_disable = a4xx_regulator_disable,
+	.preemption_pre_ibsubmit = a4xx_preemption_pre_ibsubmit,
+	.preemption_schedule = a4xx_preemption_schedule,
+	.preemption_init = a4xx_preemption_init,
+};
diff --git a/drivers/gpu/msm/adreno_a4xx.h b/drivers/gpu/msm/adreno_a4xx.h
new file mode 100644
index 0000000..5dabc26
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a4xx.h
@@ -0,0 +1,62 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _ADRENO_A4XX_H_
+#define _ADRENO_A4XX_H_
+
+#include "a4xx_reg.h"
+
+#define A4XX_IRQ_FLAGS \
+	{ BIT(A4XX_INT_RBBM_GPU_IDLE), "RBBM_GPU_IDLE" }, \
+	{ BIT(A4XX_INT_RBBM_REG_TIMEOUT), "RBBM_REG_TIMEOUT" }, \
+	{ BIT(A4XX_INT_RBBM_ME_MS_TIMEOUT), "RBBM_ME_MS_TIMEOUT" }, \
+	{ BIT(A4XX_INT_RBBM_PFP_MS_TIMEOUT), "RBBM_PFP_MS_TIMEOUT" }, \
+	{ BIT(A4XX_INT_RBBM_ETS_MS_TIMEOUT), "RBBM_ETS_MS_TIMEOUT" }, \
+	{ BIT(A4XX_INT_RBBM_ASYNC_OVERFLOW), "RBBM_ASYNC_OVERFLOW" }, \
+	{ BIT(A4XX_INT_RBBM_GPC_ERR), "RBBM_GPC_ERR" }, \
+	{ BIT(A4XX_INT_CP_SW), "CP_SW" }, \
+	{ BIT(A4XX_INT_CP_OPCODE_ERROR), "CP_OPCODE_ERROR" }, \
+	{ BIT(A4XX_INT_CP_RESERVED_BIT_ERROR), "CP_RESERVED_BIT_ERROR" }, \
+	{ BIT(A4XX_INT_CP_HW_FAULT), "CP_HW_FAULT" }, \
+	{ BIT(A4XX_INT_CP_DMA), "CP_DMA" }, \
+	{ BIT(A4XX_INT_CP_IB2_INT), "CP_IB2_INT" }, \
+	{ BIT(A4XX_INT_CP_IB1_INT), "CP_IB1_INT" }, \
+	{ BIT(A4XX_INT_CP_RB_INT), "CP_RB_INT" }, \
+	{ BIT(A4XX_INT_CP_REG_PROTECT_FAULT), "CP_REG_PROTECT_FAULT" }, \
+	{ BIT(A4XX_INT_CP_RB_DONE_TS), "CP_RB_DONE_TS" }, \
+	{ BIT(A4XX_INT_CP_VS_DONE_TS), "CP_VS_DONE_TS" }, \
+	{ BIT(A4XX_INT_CP_PS_DONE_TS), "CP_PS_DONE_TS" }, \
+	{ BIT(A4XX_INT_CACHE_FLUSH_TS), "CACHE_FLUSH_TS" }, \
+	{ BIT(A4XX_INT_CP_AHB_ERROR_HALT), "CP_AHB_ERROR_HALT" }, \
+	{ BIT(A4XX_INT_RBBM_ATB_BUS_OVERFLOW), "RBBM_ATB_BUS_OVERFLOW" }, \
+	{ BIT(A4XX_INT_MISC_HANG_DETECT), "MISC_HANG_DETECT" }, \
+	{ BIT(A4XX_INT_UCHE_OOB_ACCESS), "UCHE_OOB_ACCESS" }, \
+	{ BIT(A4XX_INT_RBBM_DPM_CALC_ERR), "RBBM_DPM_CALC_ERR" }, \
+	{ BIT(A4XX_INT_RBBM_DPM_EPOCH_ERR), "RBBM_DPM_CALC_ERR" }, \
+	{ BIT(A4XX_INT_RBBM_DPM_THERMAL_YELLOW_ERR), \
+		"RBBM_DPM_THERMAL_YELLOW_ERR" }, \
+	{ BIT(A4XX_INT_RBBM_DPM_THERMAL_RED_ERR), "RBBM_DPM_THERMAL_RED_ERR" }
+
+unsigned int a4xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb,
+			unsigned int *cmds,
+			struct kgsl_context *context);
+
+void a4xx_preemption_schedule(struct adreno_device *adreno_dev);
+
+int a4xx_preemption_init(struct adreno_device *adreno_dev);
+
+void a4xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+
+#endif
diff --git a/drivers/gpu/msm/adreno_a4xx_preempt.c b/drivers/gpu/msm/adreno_a4xx_preempt.c
new file mode 100644
index 0000000..058ac9c
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a4xx_preempt.c
@@ -0,0 +1,573 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "adreno.h"
+#include "adreno_a4xx.h"
+#include "adreno_trace.h"
+#include "adreno_pm4types.h"
+
+#define ADRENO_RB_PREEMPT_TOKEN_DWORDS		125
+
+static void a4xx_preemption_timer(unsigned long data)
+{
+	struct adreno_device *adreno_dev = (struct adreno_device *) data;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int cur_rptr = adreno_get_rptr(adreno_dev->cur_rb);
+	unsigned int next_rptr = adreno_get_rptr(adreno_dev->next_rb);
+
+	KGSL_DRV_ERR(device,
+		"Preemption timed out. cur_rb rptr/wptr %x/%x id %d, next_rb rptr/wptr %x/%x id %d, disp_state: %d\n",
+		cur_rptr, adreno_dev->cur_rb->wptr, adreno_dev->cur_rb->id,
+		next_rptr, adreno_dev->next_rb->wptr, adreno_dev->next_rb->id,
+		atomic_read(&adreno_dev->preempt.state));
+
+	adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+	adreno_dispatcher_schedule(device);
+}
+
+static unsigned int a4xx_preemption_token(struct adreno_device *adreno_dev,
+			unsigned int *cmds, uint64_t gpuaddr)
+{
+	unsigned int *cmds_orig = cmds;
+
+	/* Turn on preemption flag */
+	/* preemption token - fill when pt switch command size is known */
+	*cmds++ = cp_type3_packet(CP_PREEMPT_TOKEN, 3);
+	*cmds++ = (uint)gpuaddr;
+	*cmds++ = 1;
+	/* generate interrupt on preemption completion */
+	*cmds++ = 1 << CP_PREEMPT_ORDINAL_INTERRUPT;
+
+	return (unsigned int) (cmds - cmds_orig);
+}
+
+unsigned int a4xx_preemption_pre_ibsubmit(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, unsigned int *cmds,
+		struct kgsl_context *context)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int *cmds_orig = cmds;
+	unsigned int cond_addr = device->memstore.gpuaddr +
+		MEMSTORE_ID_GPU_ADDR(device, context->id, preempted);
+
+	cmds += a4xx_preemption_token(adreno_dev, cmds, cond_addr);
+
+	*cmds++ = cp_type3_packet(CP_COND_EXEC, 4);
+	*cmds++ = cond_addr;
+	*cmds++ = cond_addr;
+	*cmds++ = 1;
+	*cmds++ = 7;
+
+	/* clear preemption flag */
+	*cmds++ = cp_type3_packet(CP_MEM_WRITE, 2);
+	*cmds++ = cond_addr;
+	*cmds++ = 0;
+	*cmds++ = cp_type3_packet(CP_WAIT_MEM_WRITES, 1);
+	*cmds++ = 0;
+	*cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+	*cmds++ = 0;
+
+	return (unsigned int) (cmds - cmds_orig);
+}
+
+
+static void a4xx_preemption_start(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	uint32_t val;
+
+	/*
+	 * Setup scratch registers from which the GPU will program the
+	 * registers required to start execution of new ringbuffer
+	 * set ringbuffer address
+	 */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG8,
+		rb->buffer_desc.gpuaddr);
+	kgsl_regread(device, A4XX_CP_RB_CNTL, &val);
+	/* scratch REG9 corresponds to CP_RB_CNTL register */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG9, val);
+	/* scratch REG10 corresponds to rptr address */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG10,
+		SCRATCH_RPTR_GPU_ADDR(device, rb->id));
+	/* scratch REG11 corresponds to rptr */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG11, adreno_get_rptr(rb));
+	/* scratch REG12 corresponds to wptr */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG12, rb->wptr);
+	/*
+	 * scratch REG13 corresponds to  IB1_BASE,
+	 * 0 since we do not do switches in between IB's
+	 */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG13, 0);
+	/* scratch REG14 corresponds to IB1_BUFSZ */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG14, 0);
+	/* scratch REG15 corresponds to IB2_BASE */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG15, 0);
+	/* scratch REG16 corresponds to  IB2_BUFSZ */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG16, 0);
+	/* scratch REG17 corresponds to GPR11 */
+	kgsl_regwrite(device, A4XX_CP_SCRATCH_REG17, rb->gpr11);
+}
+
+static void a4xx_preemption_save(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_regread(device, A4XX_CP_SCRATCH_REG23, &rb->gpr11);
+}
+
+
+static int a4xx_submit_preempt_token(struct adreno_ringbuffer *rb,
+					struct adreno_ringbuffer *incoming_rb)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int *ringcmds, *start;
+	int ptname;
+	struct kgsl_pagetable *pt;
+	int pt_switch_sizedwords = 0, total_sizedwords = 20;
+	unsigned int link[ADRENO_RB_PREEMPT_TOKEN_DWORDS];
+	uint i;
+
+	if (incoming_rb->preempted_midway) {
+
+		kgsl_sharedmem_readl(&incoming_rb->pagetable_desc,
+			&ptname, PT_INFO_OFFSET(current_rb_ptname));
+		pt = kgsl_mmu_get_pt_from_ptname(&(device->mmu),
+			ptname);
+		if (IS_ERR_OR_NULL(pt))
+			return (pt == NULL) ? -ENOENT : PTR_ERR(pt);
+		/* set the ringbuffer for incoming RB */
+		pt_switch_sizedwords =
+			adreno_iommu_set_pt_generate_cmds(incoming_rb,
+							&link[0], pt);
+		total_sizedwords += pt_switch_sizedwords;
+	}
+
+	/*
+	 *  Allocate total_sizedwords space in RB, this is the max space
+	 *  required.
+	 */
+	ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords);
+
+	if (IS_ERR(ringcmds))
+		return PTR_ERR(ringcmds);
+
+	start = ringcmds;
+
+	*ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+	*ringcmds++ = 0;
+
+	if (incoming_rb->preempted_midway) {
+		for (i = 0; i < pt_switch_sizedwords; i++)
+			*ringcmds++ = link[i];
+	}
+
+	*ringcmds++ = cp_register(adreno_dev, adreno_getreg(adreno_dev,
+			ADRENO_REG_CP_PREEMPT_DISABLE), 1);
+	*ringcmds++ = 0;
+
+	*ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+	*ringcmds++ = 1;
+
+	ringcmds += a4xx_preemption_token(adreno_dev, ringcmds,
+				device->memstore.gpuaddr +
+				MEMSTORE_RB_OFFSET(rb, preempted));
+
+	if ((uint)(ringcmds - start) > total_sizedwords)
+		KGSL_DRV_ERR(device, "Insufficient rb size allocated\n");
+
+	/*
+	 * If we have commands less than the space reserved in RB
+	 *  adjust the wptr accordingly
+	 */
+	rb->wptr = rb->wptr - (total_sizedwords - (uint)(ringcmds - start));
+
+	/* submit just the preempt token */
+	mb();
+	kgsl_pwrscale_busy(device);
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR, rb->wptr);
+	return 0;
+}
+
+static void a4xx_preempt_trig_state(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int rbbase, val;
+	int ret;
+
+	/*
+	 * Hardware not yet idle means that preemption interrupt
+	 * may still occur, nothing to do here until interrupt signals
+	 * completion of preemption, just return here
+	 */
+	if (!adreno_hw_isidle(adreno_dev))
+		return;
+
+	/*
+	 * We just changed states, reschedule dispatcher to change
+	 * preemption states
+	 */
+
+	if (atomic_read(&adreno_dev->preempt.state) !=
+		ADRENO_PREEMPT_TRIGGERED) {
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+	/*
+	 * H/W is idle and we did not get a preemption interrupt, may
+	 * be device went idle w/o encountering any preempt token or
+	 * we already preempted w/o interrupt
+	 */
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_BASE, &rbbase);
+	 /* Did preemption occur, if so then change states and return */
+	if (rbbase != adreno_dev->cur_rb->buffer_desc.gpuaddr) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT_DEBUG, &val);
+		if (val && rbbase == adreno_dev->next_rb->buffer_desc.gpuaddr) {
+			KGSL_DRV_INFO(device,
+			"Preemption completed without interrupt\n");
+			trace_adreno_hw_preempt_trig_to_comp(adreno_dev->cur_rb,
+					adreno_dev->next_rb,
+					adreno_get_rptr(adreno_dev->cur_rb),
+					adreno_get_rptr(adreno_dev->next_rb));
+			adreno_set_preempt_state(adreno_dev,
+				ADRENO_PREEMPT_COMPLETE);
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+		adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+		/* reschedule dispatcher to take care of the fault */
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+	/*
+	 * Check if preempt token was submitted after preemption trigger, if so
+	 * then preemption should have occurred, since device is already idle it
+	 * means something went wrong - trigger FT
+	 */
+	if (adreno_dev->preempt.token_submit) {
+		adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+		/* reschedule dispatcher to take care of the fault */
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+	/*
+	 * Preempt token was not submitted after preemption trigger so device
+	 * may have gone idle before preemption could occur, if there are
+	 * commands that got submitted to current RB after triggering preemption
+	 * then submit them as those commands may have a preempt token in them
+	 */
+	if (!adreno_rb_empty(adreno_dev->cur_rb)) {
+		/*
+		 * Memory barrier before informing the
+		 * hardware of new commands
+		 */
+		mb();
+		kgsl_pwrscale_busy(device);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR,
+			adreno_dev->cur_rb->wptr);
+		return;
+	}
+
+	/* Submit preempt token to make preemption happen */
+	ret = adreno_drawctxt_switch(adreno_dev, adreno_dev->cur_rb,
+		NULL, 0);
+	if (ret)
+		KGSL_DRV_ERR(device,
+			"Unable to switch context to NULL: %d\n", ret);
+
+	ret = a4xx_submit_preempt_token(adreno_dev->cur_rb,
+						adreno_dev->next_rb);
+	if (ret)
+		KGSL_DRV_ERR(device,
+			"Unable to submit preempt token: %d\n", ret);
+
+	adreno_dev->preempt.token_submit = true;
+	adreno_dev->cur_rb->wptr_preempt_end = adreno_dev->cur_rb->wptr;
+	trace_adreno_hw_preempt_token_submit(adreno_dev->cur_rb,
+			adreno_dev->next_rb,
+			adreno_get_rptr(adreno_dev->cur_rb),
+			adreno_get_rptr(adreno_dev->next_rb));
+}
+
+static struct adreno_ringbuffer *a4xx_next_ringbuffer(
+		struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb, *next = NULL;
+	int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		if (!adreno_rb_empty(rb) && next == NULL) {
+			next = rb;
+			continue;
+		}
+
+		if (!adreno_disp_preempt_fair_sched)
+			continue;
+
+		switch (rb->starve_timer_state) {
+		case ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT:
+			if (!adreno_rb_empty(rb) &&
+				adreno_dev->cur_rb != rb) {
+				rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT;
+				rb->sched_timer = jiffies;
+			}
+			break;
+		case ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT:
+			if (time_after(jiffies, rb->sched_timer +
+				msecs_to_jiffies(
+					adreno_dispatch_starvation_time))) {
+				rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED;
+				/* halt dispatcher to remove starvation */
+				adreno_get_gpu_halt(adreno_dev);
+			}
+			break;
+		case ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED:
+			/*
+			 * If the RB has not been running for the minimum
+			 * time slice then allow it to run
+			 */
+			if (!adreno_rb_empty(rb) && time_before(jiffies,
+				adreno_dev->cur_rb->sched_timer +
+				msecs_to_jiffies(adreno_dispatch_time_slice)))
+				next = rb;
+			else
+				rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT;
+			break;
+		case ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED:
+		default:
+			break;
+		}
+	}
+
+	return next;
+}
+
+static void a4xx_preempt_clear_state(struct adreno_device *adreno_dev)
+
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *highest_busy_rb;
+	int switch_low_to_high;
+	int ret;
+
+	/* Device not awake means there is nothing to do */
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	highest_busy_rb = a4xx_next_ringbuffer(adreno_dev);
+	if (!highest_busy_rb || highest_busy_rb == adreno_dev->cur_rb)
+		return;
+
+	switch_low_to_high = adreno_compare_prio_level(
+					highest_busy_rb->id,
+					adreno_dev->cur_rb->id);
+
+	if (switch_low_to_high < 0) {
+		/*
+		 * if switching to lower priority make sure that the rptr and
+		 * wptr are equal, when the lower rb is not starved
+		 */
+		if (!adreno_rb_empty(adreno_dev->cur_rb))
+			return;
+		/*
+		 * switch to default context because when we switch back
+		 * to higher context then its not known which pt will
+		 * be current, so by making it default here the next
+		 * commands submitted will set the right pt
+		 */
+		ret = adreno_drawctxt_switch(adreno_dev,
+				adreno_dev->cur_rb,
+				NULL, 0);
+		/*
+		 * lower priority RB has to wait until space opens up in
+		 * higher RB
+		 */
+		if (ret) {
+			KGSL_DRV_ERR(device,
+				"Unable to switch context to NULL: %d",
+				ret);
+
+			return;
+		}
+
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_CP_PREEMPT_DISABLE, 1);
+	}
+
+	/*
+	 * setup registers to do the switch to highest priority RB
+	 * which is not empty or may be starving away(poor thing)
+	 */
+	a4xx_preemption_start(adreno_dev, highest_busy_rb);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED);
+
+	adreno_dev->next_rb = highest_busy_rb;
+	mod_timer(&adreno_dev->preempt.timer, jiffies +
+		msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT));
+
+	trace_adreno_hw_preempt_clear_to_trig(adreno_dev->cur_rb,
+			adreno_dev->next_rb,
+			adreno_get_rptr(adreno_dev->cur_rb),
+			adreno_get_rptr(adreno_dev->next_rb));
+	/* issue PREEMPT trigger */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT, 1);
+
+	/* submit preempt token packet to ensure preemption */
+	if (switch_low_to_high < 0) {
+		ret = a4xx_submit_preempt_token(
+			adreno_dev->cur_rb, adreno_dev->next_rb);
+		KGSL_DRV_ERR(device,
+			"Unable to submit preempt token: %d\n", ret);
+		adreno_dev->preempt.token_submit = true;
+		adreno_dev->cur_rb->wptr_preempt_end = adreno_dev->cur_rb->wptr;
+	} else {
+		adreno_dev->preempt.token_submit = false;
+		adreno_dispatcher_schedule(device);
+		adreno_dev->cur_rb->wptr_preempt_end = 0xFFFFFFFF;
+	}
+}
+
+static void a4xx_preempt_complete_state(struct adreno_device *adreno_dev)
+
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int wptr, rbbase;
+	unsigned int val, val1;
+	unsigned int prevrptr;
+
+	del_timer_sync(&adreno_dev->preempt.timer);
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &val);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT_DEBUG, &val1);
+
+	if (val || !val1) {
+		KGSL_DRV_ERR(device,
+		"Invalid state after preemption CP_PREEMPT: %08x, CP_PREEMPT_DEBUG: %08x\n",
+		val, val1);
+		adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_BASE, &rbbase);
+	if (rbbase != adreno_dev->next_rb->buffer_desc.gpuaddr) {
+		KGSL_DRV_ERR(device,
+		"RBBASE incorrect after preemption, expected %x got %016llx\b",
+		rbbase,
+		adreno_dev->next_rb->buffer_desc.gpuaddr);
+		adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+		adreno_dispatcher_schedule(device);
+		return;
+	}
+
+	a4xx_preemption_save(adreno_dev, adreno_dev->cur_rb);
+
+	/* new RB is the current RB */
+	trace_adreno_hw_preempt_comp_to_clear(adreno_dev->next_rb,
+			adreno_dev->cur_rb,
+			adreno_get_rptr(adreno_dev->next_rb),
+			adreno_get_rptr(adreno_dev->cur_rb));
+
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->cur_rb->preempted_midway = 0;
+	adreno_dev->cur_rb->wptr_preempt_end = 0xFFFFFFFF;
+	adreno_dev->next_rb = NULL;
+
+	if (adreno_disp_preempt_fair_sched) {
+		/* starved rb is now scheduled so unhalt dispatcher */
+		if (ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED ==
+			adreno_dev->cur_rb->starve_timer_state)
+			adreno_put_gpu_halt(adreno_dev);
+		adreno_dev->cur_rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED;
+		adreno_dev->cur_rb->sched_timer = jiffies;
+		/*
+		 * If the outgoing RB is has commands then set the
+		 * busy time for it
+		 */
+		if (!adreno_rb_empty(adreno_dev->prev_rb)) {
+			adreno_dev->prev_rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT;
+			adreno_dev->prev_rb->sched_timer = jiffies;
+		} else {
+			adreno_dev->prev_rb->starve_timer_state =
+				ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT;
+		}
+	}
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	prevrptr = adreno_get_rptr(adreno_dev->prev_rb);
+
+	if (adreno_compare_prio_level(adreno_dev->prev_rb->id,
+				adreno_dev->cur_rb->id) < 0) {
+		if (adreno_dev->prev_rb->wptr_preempt_end != prevrptr)
+			adreno_dev->prev_rb->preempted_midway = 1;
+	}
+
+	/* submit wptr if required for new rb */
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr);
+	if (adreno_dev->cur_rb->wptr != wptr) {
+		kgsl_pwrscale_busy(device);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR,
+					adreno_dev->cur_rb->wptr);
+	}
+	/* clear preemption register */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT_DEBUG, 0);
+}
+
+void a4xx_preemption_schedule(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	switch (atomic_read(&adreno_dev->preempt.state)) {
+	case ADRENO_PREEMPT_NONE:
+		a4xx_preempt_clear_state(adreno_dev);
+		break;
+	case ADRENO_PREEMPT_TRIGGERED:
+		a4xx_preempt_trig_state(adreno_dev);
+		/*
+		 * if we transitioned to next state then fall-through
+		 * processing to next state
+		 */
+		if (!adreno_in_preempt_state(adreno_dev,
+			ADRENO_PREEMPT_COMPLETE))
+			break;
+	case ADRENO_PREEMPT_COMPLETE:
+		a4xx_preempt_complete_state(adreno_dev);
+		break;
+	default:
+		break;
+	}
+
+	mutex_unlock(&device->mutex);
+}
+
+int a4xx_preemption_init(struct adreno_device *adreno_dev)
+{
+	setup_timer(&adreno_dev->preempt.timer, a4xx_preemption_timer,
+		(unsigned long) adreno_dev);
+
+	return 0;
+}
diff --git a/drivers/gpu/msm/adreno_a4xx_snapshot.c b/drivers/gpu/msm/adreno_a4xx_snapshot.c
new file mode 100644
index 0000000..b6737d4
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a4xx_snapshot.c
@@ -0,0 +1,600 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/io.h>
+#include "kgsl.h"
+#include "adreno.h"
+#include "kgsl_snapshot.h"
+#include "a4xx_reg.h"
+#include "adreno_snapshot.h"
+#include "adreno_a4xx.h"
+
+/*
+ * Set of registers to dump for A4XX on snapshot.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+
+static const unsigned int a4xx_registers[] = {
+	/* RBBM */
+	0x0000, 0x0002, 0x0004, 0x0021, 0x0023, 0x0024, 0x0026, 0x0026,
+	0x0028, 0x002B, 0x002E, 0x0034, 0x0037, 0x0044, 0x0047, 0x0066,
+	0x0068, 0x0095, 0x009C, 0x0170, 0x0174, 0x01AF,
+	/* CP */
+	0x0200, 0x0226, 0x0228, 0x0233, 0x0240, 0x0258, 0x04C0, 0x04D0,
+	0x04D2, 0x04DD, 0x0500, 0x050B, 0x0578, 0x058F,
+	/* VSC */
+	0x0C00, 0x0C03, 0x0C08, 0x0C41, 0x0C50, 0x0C51,
+	/* GRAS */
+	0x0C80, 0x0C81, 0x0C88, 0x0C8F,
+	/* RB */
+	0x0CC0, 0x0CC0, 0x0CC4, 0x0CD2,
+	/* PC */
+	0x0D00, 0x0D0C, 0x0D10, 0x0D17, 0x0D20, 0x0D23,
+	/* VFD */
+	0x0E40, 0x0E4A,
+	/* VPC */
+	0x0E60, 0x0E61, 0x0E63, 0x0E68,
+	/* UCHE */
+	0x0E80, 0x0E84, 0x0E88, 0x0E95,
+	/* GRAS CTX 0 */
+	0x2000, 0x2004, 0x2008, 0x2067, 0x2070, 0x2078, 0x207B, 0x216E,
+	/* PC CTX 0 */
+	0x21C0, 0x21C6, 0x21D0, 0x21D0, 0x21D9, 0x21D9, 0x21E5, 0x21E7,
+	/* VFD CTX 0 */
+	0x2200, 0x2204, 0x2208, 0x22A9,
+	/* GRAS CTX 1 */
+	0x2400, 0x2404, 0x2408, 0x2467, 0x2470, 0x2478, 0x247B, 0x256E,
+	/* PC CTX 1 */
+	0x25C0, 0x25C6, 0x25D0, 0x25D0, 0x25D9, 0x25D9, 0x25E5, 0x25E7,
+	/* VFD CTX 1 */
+	0x2600, 0x2604, 0x2608, 0x26A9,
+};
+
+static const unsigned int a4xx_sp_tp_registers[] = {
+	/* SP */
+	0x0EC0, 0x0ECF,
+	/* TPL1 */
+	0x0F00, 0x0F0B,
+	/* SP CTX 0 */
+	0x22C0, 0x22C1, 0x22C4, 0x22E5, 0x22E8, 0x22F8, 0x2300, 0x2306,
+	0x230C, 0x2312, 0x2318, 0x2339, 0x2340, 0x2360,
+	/* TPL1 CTX 0 */
+	0x2380, 0x2382, 0x2384, 0x238F, 0x23A0, 0x23A6,
+	/* SP CTX 1 */+
+	0x26C0, 0x26C1, 0x26C4, 0x26E5, 0x26E8, 0x26F8, 0x2700, 0x2706,
+	0x270C, 0x2712, 0x2718, 0x2739, 0x2740, 0x2760,
+	/* TPL1 CTX 1 */
+	0x2780, 0x2782, 0x2784, 0x278F, 0x27A0, 0x27A6,
+};
+
+static const unsigned int a4xx_ppd_registers[] = {
+	/* V2 Thresholds */
+	0x01B2, 0x01B5,
+	/* Control and Status */
+	0x01B9, 0x01BE,
+};
+
+static const unsigned int a4xx_xpu_registers[] = {
+	/* XPU */
+	0x2C00, 0x2C01, 0x2C10, 0x2C10, 0x2C12, 0x2C16, 0x2C1D, 0x2C20,
+	0x2C28, 0x2C28, 0x2C30, 0x2C30, 0x2C32, 0x2C36, 0x2C40, 0x2C40,
+	0x2C50, 0x2C50, 0x2C52, 0x2C56, 0x2C80, 0x2C80, 0x2C94, 0x2C95,
+};
+
+static const unsigned int a4xx_vbif_ver_20000000_registers[] = {
+	/* VBIF version 0x20000000 & IOMMU V1 */
+	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
+	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
+	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
+	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
+	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
+	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
+	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
+	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
+	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x330C, 0x330C,
+	0x3310, 0x3310, 0x3400, 0x3401, 0x3410, 0x3410, 0x3412, 0x3416,
+	0x341D, 0x3420, 0x3428, 0x3428, 0x3430, 0x3430, 0x3432, 0x3436,
+	0x3440, 0x3440, 0x3450, 0x3450, 0x3452, 0x3456, 0x3480, 0x3480,
+	0x3494, 0x3495, 0x4000, 0x4000, 0x4002, 0x4002, 0x4004, 0x4004,
+	0x4008, 0x400A, 0x400C, 0x400D, 0x400F, 0x4012, 0x4014, 0x4016,
+	0x401D, 0x401D, 0x4020, 0x4027, 0x4060, 0x4062, 0x4200, 0x4200,
+	0x4300, 0x4300, 0x4400, 0x4400, 0x4500, 0x4500, 0x4800, 0x4802,
+	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
+	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
+	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
+	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
+	0x4B00, 0x4B00, 0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00,
+	0x4E80, 0x4E80, 0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10,
+	0x4F18, 0x4F18, 0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60,
+	0x4F80, 0x4F81, 0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3,
+	0x6000, 0x6001, 0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B,
+	0x61FD, 0x61FD, 0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0,
+	0x63C0, 0x63C1, 0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6,
+	0x63EE, 0x63EE, 0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416,
+	0x6418, 0x641B, 0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780,
+	0x67A0, 0x67A0, 0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4,
+	0x67D6, 0x67D6, 0x67EE, 0x67EE,
+};
+
+static const unsigned int a4xx_vbif_ver_20020000_registers[] = {
+	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x301D, 0x3020, 0x3022,
+	0x3024, 0x3026, 0x3028, 0x302A, 0x302C, 0x302D, 0x3030, 0x3031,
+	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
+	0x3049, 0x3049, 0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068,
+	0x306C, 0x306D, 0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094,
+	0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8,
+	0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100,
+	0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120,
+	0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131, 0x4800, 0x4802,
+	0x480F, 0x480F, 0x4811, 0x4811, 0x4813, 0x4813, 0x4815, 0x4816,
+	0x482B, 0x482B, 0x4857, 0x4857, 0x4883, 0x4883, 0x48AF, 0x48AF,
+	0x48C5, 0x48C5, 0x48E5, 0x48E5, 0x4905, 0x4905, 0x4925, 0x4925,
+	0x4945, 0x4945, 0x4950, 0x4950, 0x495B, 0x495B, 0x4980, 0x498E,
+	0x4C00, 0x4C00, 0x4D00, 0x4D00, 0x4E00, 0x4E00, 0x4E80, 0x4E80,
+	0x4F00, 0x4F00, 0x4F08, 0x4F08, 0x4F10, 0x4F10, 0x4F18, 0x4F18,
+	0x4F20, 0x4F20, 0x4F30, 0x4F30, 0x4F60, 0x4F60, 0x4F80, 0x4F81,
+	0x4F88, 0x4F89, 0x4FEE, 0x4FEE, 0x4FF3, 0x4FF3, 0x6000, 0x6001,
+	0x6008, 0x600F, 0x6014, 0x6016, 0x6018, 0x601B, 0x61FD, 0x61FD,
+	0x623C, 0x623C, 0x6380, 0x6380, 0x63A0, 0x63A0, 0x63C0, 0x63C1,
+	0x63C8, 0x63C9, 0x63D0, 0x63D4, 0x63D6, 0x63D6, 0x63EE, 0x63EE,
+	0x6400, 0x6401, 0x6408, 0x640F, 0x6414, 0x6416, 0x6418, 0x641B,
+	0x65FD, 0x65FD, 0x663C, 0x663C, 0x6780, 0x6780, 0x67A0, 0x67A0,
+	0x67C0, 0x67C1, 0x67C8, 0x67C9, 0x67D0, 0x67D4, 0x67D6, 0x67D6,
+	0x67EE, 0x67EE,
+};
+
+static const unsigned int a4xx_vbif_ver_20050000_registers[] = {
+	/* VBIF version 0x20050000 and 0x20090000 */
+	0x3000, 0x3007, 0x302C, 0x302C, 0x3030, 0x3030, 0x3034, 0x3036,
+	0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040, 0x3049, 0x3049,
+	0x3058, 0x3058, 0x305B, 0x3061, 0x3064, 0x3068, 0x306C, 0x306D,
+	0x3080, 0x3088, 0x308B, 0x308C, 0x3090, 0x3094, 0x3098, 0x3098,
+	0x309C, 0x309C, 0x30C0, 0x30C0, 0x30C8, 0x30C8, 0x30D0, 0x30D0,
+	0x30D8, 0x30D8, 0x30E0, 0x30E0, 0x3100, 0x3100, 0x3108, 0x3108,
+	0x3110, 0x3110, 0x3118, 0x3118, 0x3120, 0x3120, 0x3124, 0x3125,
+	0x3129, 0x3129, 0x340C, 0x340C, 0x3410, 0x3410,
+};
+
+static const struct adreno_vbif_snapshot_registers
+					a4xx_vbif_snapshot_registers[] = {
+	{ 0x20000000, 0xFFFF0000, a4xx_vbif_ver_20000000_registers,
+				ARRAY_SIZE(a4xx_vbif_ver_20000000_registers)/2},
+	{ 0x20020000, 0xFFFF0000, a4xx_vbif_ver_20020000_registers,
+				ARRAY_SIZE(a4xx_vbif_ver_20020000_registers)/2},
+	{ 0x20050000, 0xFFFF0000, a4xx_vbif_ver_20050000_registers,
+				ARRAY_SIZE(a4xx_vbif_ver_20050000_registers)/2},
+	{ 0x20070000, 0xFFFF0000, a4xx_vbif_ver_20020000_registers,
+				ARRAY_SIZE(a4xx_vbif_ver_20020000_registers)/2},
+	{ 0x20090000, 0xFFFF0000, a4xx_vbif_ver_20050000_registers,
+				ARRAY_SIZE(a4xx_vbif_ver_20050000_registers)/2},
+};
+
+#define A4XX_NUM_SHADER_BANKS 4
+#define A405_NUM_SHADER_BANKS 1
+/* Shader memory size in words */
+#define A4XX_SHADER_MEMORY_SIZE 0x4000
+
+static const struct adreno_debugbus_block a4xx_debugbus_blocks[] = {
+	{ A4XX_RBBM_DEBBUS_CP_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_RBBM_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_VBIF_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_HLSQ_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_UCHE_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_DPM_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_TESS_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_PC_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_VFD_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_VPC_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_TSE_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_RAS_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_VSC_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_COM_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_DCOM_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_SP_0_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_TPL1_0_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_RB_0_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_MARB_0_ID, 0x100 },
+};
+
+static const struct adreno_debugbus_block a420_debugbus_blocks[] = {
+	{ A4XX_RBBM_DEBBUS_SP_1_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_SP_2_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_SP_3_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_TPL1_1_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_TPL1_2_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_TPL1_3_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_RB_1_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_RB_2_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_RB_3_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_MARB_1_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_MARB_2_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_MARB_3_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_CCU_0_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_CCU_1_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_CCU_2_ID, 0x100, },
+	{ A4XX_RBBM_DEBBUS_CCU_3_ID, 0x100, },
+};
+
+/**
+ * a4xx_snapshot_shader_memory - Helper function to dump the GPU shader
+ * memory to the snapshot buffer.
+ * @device: GPU device whose shader memory is to be dumped
+ * @buf: Pointer to binary snapshot data blob being made
+ * @remain: Number of remaining bytes in the snapshot blob
+ * @priv: Unused parameter
+ *
+ */
+static size_t a4xx_snapshot_shader_memory(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int i, j;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int shader_read_len = A4XX_SHADER_MEMORY_SIZE;
+	unsigned int shader_banks = A4XX_NUM_SHADER_BANKS;
+
+	if (shader_read_len > (device->shader_mem_len >> 2))
+		shader_read_len = (device->shader_mem_len >> 2);
+
+	if (adreno_is_a405(adreno_dev))
+		shader_banks = A405_NUM_SHADER_BANKS;
+
+	if (remain < DEBUG_SECTION_SZ(shader_read_len *
+				shader_banks)) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_SHADER_MEMORY;
+	header->size = shader_read_len * shader_banks;
+
+	/* Map shader memory to kernel, for dumping */
+	if (device->shader_mem_virt == NULL)
+		device->shader_mem_virt = devm_ioremap(device->dev,
+					device->shader_mem_phys,
+					device->shader_mem_len);
+
+	if (device->shader_mem_virt == NULL) {
+		KGSL_DRV_ERR(device,
+		"Unable to map shader memory region\n");
+		return 0;
+	}
+
+	for (j = 0; j < shader_banks; j++) {
+		unsigned int val;
+		/* select the SPTP */
+		kgsl_regread(device, A4XX_HLSQ_SPTP_RDSEL, &val);
+		val &= ~0x3;
+		val |= j;
+		kgsl_regwrite(device, A4XX_HLSQ_SPTP_RDSEL, val);
+		/* Now, dump shader memory to snapshot */
+		for (i = 0; i < shader_read_len; i++)
+			adreno_shadermem_regread(device, i,
+				&data[i + j * shader_read_len]);
+	}
+
+
+	return DEBUG_SECTION_SZ(shader_read_len * shader_banks);
+}
+
+/*
+ * a4xx_rbbm_debug_bus_read() - Read data from trace bus
+ * @device: Device whose data bus is read
+ * @block_id: Trace bus block ID
+ * @index: Index of data to read
+ * @val: Output parameter where data is read
+ */
+static void a4xx_rbbm_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int reg = 0;
+
+	reg |= (block_id << A4XX_RBBM_CFG_DEBBUS_SEL_PING_BLK_SEL_SHIFT);
+	reg |= (index << A4XX_RBBM_CFG_DEBBUS_SEL_PING_INDEX_SHIFT);
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_SEL_A, reg);
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_SEL_B, reg);
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_SEL_C, reg);
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_SEL_D, reg);
+
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_IDX, 0x3020000);
+	kgsl_regread(device, A4XX_RBBM_CFG_DEBBUS_TRACE_BUF4, val);
+	val++;
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_IDX, 0x1000000);
+	kgsl_regread(device, A4XX_RBBM_CFG_DEBBUS_TRACE_BUF4, val);
+}
+
+/*
+ * a4xx_snapshot_vbif_debugbus() - Dump the VBIF debug data
+ * @device: Device pointer for which the debug data is dumped
+ * @buf: Pointer to the memory where the data is dumped
+ * @remain: Amount of bytes remaining in snapshot
+ * @priv: Pointer to debug bus block
+ *
+ * Returns the number of bytes dumped
+ */
+static size_t a4xx_snapshot_vbif_debugbus(struct kgsl_device *device,
+			u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i, j;
+	/*
+	 * Total number of VBIF data words considering 3 sections:
+	 * 2 arbiter blocks of 16 words
+	 * 5 AXI XIN blocks of 4 dwords each
+	 * 5 core clock side XIN blocks of 5 dwords each
+	 */
+	unsigned int dwords = (16 * A4XX_NUM_AXI_ARB_BLOCKS) +
+			(4 * A4XX_NUM_XIN_BLOCKS) + (5 * A4XX_NUM_XIN_BLOCKS);
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size;
+	unsigned int reg_clk;
+
+	size = (dwords * sizeof(unsigned int)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+	header->id = block->block_id;
+	header->count = dwords;
+
+	kgsl_regread(device, A4XX_VBIF_CLKON, &reg_clk);
+	kgsl_regwrite(device, A4XX_VBIF_CLKON, reg_clk |
+			(A4XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK <<
+			A4XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT));
+	kgsl_regwrite(device, A4XX_VBIF_TEST_BUS1_CTRL0, 0);
+	kgsl_regwrite(device, A4XX_VBIF_TEST_BUS_OUT_CTRL,
+			(A4XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK <<
+			A4XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT));
+	for (i = 0; i < A4XX_NUM_AXI_ARB_BLOCKS; i++) {
+		kgsl_regwrite(device, A4XX_VBIF_TEST_BUS2_CTRL0,
+			(1 << (i + 16)));
+		for (j = 0; j < 16; j++) {
+			kgsl_regwrite(device, A4XX_VBIF_TEST_BUS2_CTRL1,
+				((j & A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK)
+				<< A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A4XX_VBIF_TEST_BUS_OUT,
+					data);
+			data++;
+		}
+	}
+
+	/* XIN blocks AXI side */
+	for (i = 0; i < A4XX_NUM_XIN_BLOCKS; i++) {
+		kgsl_regwrite(device, A4XX_VBIF_TEST_BUS2_CTRL0, 1 << i);
+		for (j = 0; j < 4; j++) {
+			kgsl_regwrite(device, A4XX_VBIF_TEST_BUS2_CTRL1,
+				((j & A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK)
+				<< A4XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A4XX_VBIF_TEST_BUS_OUT,
+				data);
+			data++;
+		}
+	}
+
+	/* XIN blocks core clock side */
+	for (i = 0; i < A4XX_NUM_XIN_BLOCKS; i++) {
+		kgsl_regwrite(device, A4XX_VBIF_TEST_BUS1_CTRL0, 1 << i);
+		for (j = 0; j < 5; j++) {
+			kgsl_regwrite(device, A4XX_VBIF_TEST_BUS1_CTRL1,
+				((j & A4XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK)
+				<< A4XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A4XX_VBIF_TEST_BUS_OUT,
+				data);
+			data++;
+		}
+	}
+	/* restore the clock of VBIF */
+	kgsl_regwrite(device, A4XX_VBIF_CLKON, reg_clk);
+	return size;
+}
+
+/*
+ * a4xx_snapshot_debugbus_block() - Capture debug data for a gpu block
+ * @device: Pointer to device
+ * @buf: Memory where data is captured
+ * @remain: Number of bytes left in snapshot
+ * @priv: Pointer to debug bus block
+ *
+ * Returns the number of bytes written
+ */
+static size_t a4xx_snapshot_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int dwords;
+	size_t size;
+
+	dwords = block->dwords;
+
+	/* For a4xx each debug bus data unit is 2 DWRODS */
+	size = (dwords * sizeof(unsigned int) * 2) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = block->block_id;
+	header->count = dwords * 2;
+
+	for (i = 0; i < dwords; i++)
+		a4xx_rbbm_debug_bus_read(device, block->block_id, i,
+					&data[i*2]);
+
+	return size;
+}
+
+/*
+ * a4xx_snapshot_debugbus() - Capture debug bus data
+ * @device: The device for which data is captured
+ * @snapshot: Pointer to the snapshot instance
+ */
+static void a4xx_snapshot_debugbus(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int i;
+
+	kgsl_regwrite(device, A4XX_RBBM_CFG_DEBBUS_CTLM,
+		0xf << A4XX_RBBM_CFG_DEBBUS_CTLT_ENABLE_SHIFT);
+
+	for (i = 0; i < ARRAY_SIZE(a4xx_debugbus_blocks); i++) {
+		if (A4XX_RBBM_DEBBUS_VBIF_ID ==
+			a4xx_debugbus_blocks[i].block_id)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a4xx_snapshot_vbif_debugbus,
+				(void *) &a4xx_debugbus_blocks[i]);
+		else
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a4xx_snapshot_debugbus_block,
+				(void *) &a4xx_debugbus_blocks[i]);
+	}
+
+	if (!adreno_is_a405(adreno_dev)) {
+		for (i = 0; i < ARRAY_SIZE(a420_debugbus_blocks); i++)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a4xx_snapshot_debugbus_block,
+				(void *) &a420_debugbus_blocks[i]);
+
+	}
+}
+
+static void a4xx_reset_hlsq(struct kgsl_device *device)
+{
+	unsigned int val, dummy = 0;
+
+	/* reset cp */
+	kgsl_regwrite(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, 1 << 20);
+	kgsl_regread(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, &dummy);
+
+	/* reset hlsq */
+	kgsl_regwrite(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, 1 << 25);
+	kgsl_regread(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, &dummy);
+
+	/* clear reset bits */
+	kgsl_regwrite(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, 0);
+	kgsl_regread(device, A4XX_RBBM_BLOCK_SW_RESET_CMD, &dummy);
+
+
+	/* set HLSQ_TIMEOUT_THRESHOLD.cycle_timeout_limit_sp to 26 */
+	kgsl_regread(device, A4XX_HLSQ_TIMEOUT_THRESHOLD, &val);
+	val &= (0x1F << 24);
+	val |= (26 << 24);
+	kgsl_regwrite(device, A4XX_HLSQ_TIMEOUT_THRESHOLD, val);
+}
+
+/*
+ * a4xx_snapshot() - A4XX GPU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the A4XX specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a4xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_snapshot_data *snap_data = gpudev->snapshot_data;
+
+	/* Disable SP clock gating for the debug bus to work */
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP0, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP1, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP2, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL_SP3, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP0, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP1, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP2, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2_SP3, 0);
+
+	/* Disable top level clock gating the debug bus to work */
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL, 0);
+	kgsl_regwrite(device, A4XX_RBBM_CLOCK_CTL2, 0);
+
+	/* Master set of (non debug) registers */
+
+	SNAPSHOT_REGISTERS(device, snapshot, a4xx_registers);
+
+	if (adreno_is_a430(adreno_dev))
+		SNAPSHOT_REGISTERS(device, snapshot, a4xx_sp_tp_registers);
+
+	if (adreno_is_a420(adreno_dev))
+		SNAPSHOT_REGISTERS(device, snapshot, a4xx_xpu_registers);
+
+	if (adreno_is_a430v2(adreno_dev))
+		SNAPSHOT_REGISTERS(device, snapshot, a4xx_ppd_registers);
+
+	adreno_snapshot_vbif_registers(device, snapshot,
+		a4xx_vbif_snapshot_registers,
+		ARRAY_SIZE(a4xx_vbif_snapshot_registers));
+
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A4XX_CP_STATE_DEBUG_INDEX, A4XX_CP_STATE_DEBUG_DATA,
+		0, snap_data->sect_sizes->cp_pfp);
+
+	 /* CP_ME indexed registers */
+	 kgsl_snapshot_indexed_registers(device, snapshot,
+		A4XX_CP_ME_CNTL, A4XX_CP_ME_STATUS, 64, 44);
+
+	/* VPC memory */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_vpc_memory,
+		&snap_data->sect_sizes->vpc_mem);
+
+	/* CP MEQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_meq,
+		&snap_data->sect_sizes->cp_meq);
+
+	/* CP PFP and PM4 */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_pfp_ram, NULL);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_pm4_ram, NULL);
+
+	/* CP ROQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_roq,
+		&snap_data->sect_sizes->roq);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_merciu,
+		&snap_data->sect_sizes->cp_merciu);
+
+	/* Debug bus */
+	a4xx_snapshot_debugbus(device, snapshot);
+
+	if (!adreno_is_a430(adreno_dev)) {
+		a4xx_reset_hlsq(device);
+		SNAPSHOT_REGISTERS(device, snapshot, a4xx_sp_tp_registers);
+	}
+
+	/* Shader working/shadow memory */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a4xx_snapshot_shader_memory,
+		&snap_data->sect_sizes->shader_mem);
+}
diff --git a/drivers/gpu/msm/adreno_a5xx.c b/drivers/gpu/msm/adreno_a5xx.c
new file mode 100644
index 0000000..4ffce21
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a5xx.c
@@ -0,0 +1,3592 @@
+/* Copyright (c) 2014-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/firmware.h>
+#include <soc/qcom/subsystem_restart.h>
+#include <soc/qcom/scm.h>
+#include <linux/pm_opp.h>
+#include <linux/clk/qcom.h>
+
+#include "adreno.h"
+#include "a5xx_reg.h"
+#include "adreno_a5xx.h"
+#include "adreno_cp_parser.h"
+#include "adreno_trace.h"
+#include "adreno_pm4types.h"
+#include "adreno_perfcounter.h"
+#include "adreno_ringbuffer.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_log.h"
+#include "kgsl.h"
+#include "kgsl_trace.h"
+#include "adreno_a5xx_packets.h"
+
+static int zap_ucode_loaded;
+static int critical_packet_constructed;
+
+static struct kgsl_memdesc crit_pkts;
+static unsigned int crit_pkts_dwords;
+static struct kgsl_memdesc crit_pkts_refbuf0;
+static struct kgsl_memdesc crit_pkts_refbuf1;
+static struct kgsl_memdesc crit_pkts_refbuf2;
+static struct kgsl_memdesc crit_pkts_refbuf3;
+
+static const struct adreno_vbif_data a530_vbif[] = {
+	{A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003},
+	{0, 0},
+};
+
+static const struct adreno_vbif_data a540_vbif[] = {
+	{A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003},
+	{A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009},
+	{0, 0},
+};
+
+static const struct adreno_vbif_platform a5xx_vbif_platforms[] = {
+	{ adreno_is_a540, a540_vbif },
+	{ adreno_is_a530, a530_vbif },
+	{ adreno_is_a512, a540_vbif },
+	{ adreno_is_a510, a530_vbif },
+	{ adreno_is_a505, a530_vbif },
+	{ adreno_is_a506, a530_vbif },
+};
+
+static void a5xx_irq_storm_worker(struct work_struct *work);
+static int _read_fw2_block_header(uint32_t *header, uint32_t id,
+	uint32_t major, uint32_t minor);
+static void a5xx_gpmu_reset(struct work_struct *work);
+static int a5xx_gpmu_init(struct adreno_device *adreno_dev);
+
+/**
+ * Number of times to check if the regulator enabled before
+ * giving up and returning failure.
+ */
+#define PWR_RETRY 100
+
+/**
+ * Number of times to check if the GPMU firmware is initialized before
+ * giving up and returning failure.
+ */
+#define GPMU_FW_INIT_RETRY 5000
+
+#define A530_QFPROM_RAW_PTE_ROW0_MSB 0x134
+#define A530_QFPROM_RAW_PTE_ROW2_MSB 0x144
+
+/* Print some key registers if a spin-for-idle times out */
+static void spin_idle_debug(struct kgsl_device *device,
+		const char *str)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int rptr, wptr;
+	unsigned int status, status3, intstatus;
+	unsigned int hwfault;
+
+	dev_err(device->dev, str);
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr);
+
+	kgsl_regread(device, A5XX_RBBM_STATUS, &status);
+	kgsl_regread(device, A5XX_RBBM_STATUS3, &status3);
+	kgsl_regread(device, A5XX_RBBM_INT_0_STATUS, &intstatus);
+	kgsl_regread(device, A5XX_CP_HW_FAULT, &hwfault);
+
+	dev_err(device->dev,
+		"rb=%d pos=%X/%X rbbm_status=%8.8X/%8.8X int_0_status=%8.8X\n",
+		adreno_dev->cur_rb->id, rptr, wptr, status, status3, intstatus);
+
+	dev_err(device->dev, " hwfault=%8.8X\n", hwfault);
+
+	kgsl_device_snapshot(device, NULL);
+}
+
+static void a530_efuse_leakage(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int row0, row2;
+	unsigned int multiplier, gfx_active, leakage_pwr_on, coeff;
+
+	adreno_efuse_read_u32(adreno_dev,
+		A530_QFPROM_RAW_PTE_ROW0_MSB, &row0);
+
+	adreno_efuse_read_u32(adreno_dev,
+		A530_QFPROM_RAW_PTE_ROW2_MSB, &row2);
+
+	multiplier = (row0 >> 1) & 0x3;
+	gfx_active = (row2 >> 2) & 0xFF;
+
+	if (of_property_read_u32(device->pdev->dev.of_node,
+		"qcom,base-leakage-coefficient", &coeff))
+		return;
+
+	leakage_pwr_on = gfx_active * (1 << multiplier);
+
+	adreno_dev->lm_leakage = (leakage_pwr_on << 16) |
+		((leakage_pwr_on * coeff) / 100);
+}
+
+static void a530_efuse_speed_bin(struct adreno_device *adreno_dev)
+{
+	unsigned int val;
+	unsigned int speed_bin[3];
+	struct kgsl_device *device = &adreno_dev->dev;
+
+	if (of_property_read_u32_array(device->pdev->dev.of_node,
+		"qcom,gpu-speed-bin", speed_bin, 3))
+		return;
+
+	adreno_efuse_read_u32(adreno_dev, speed_bin[0], &val);
+
+	adreno_dev->speed_bin = (val & speed_bin[1]) >> speed_bin[2];
+}
+
+static const struct {
+	int (*check)(struct adreno_device *adreno_dev);
+	void (*func)(struct adreno_device *adreno_dev);
+} a5xx_efuse_funcs[] = {
+	{ adreno_is_a530, a530_efuse_leakage },
+	{ adreno_is_a530, a530_efuse_speed_bin },
+	{ adreno_is_a505, a530_efuse_speed_bin },
+};
+
+static void a5xx_check_features(struct adreno_device *adreno_dev)
+{
+	unsigned int i;
+
+	if (adreno_efuse_map(adreno_dev))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_efuse_funcs); i++) {
+		if (a5xx_efuse_funcs[i].check(adreno_dev))
+			a5xx_efuse_funcs[i].func(adreno_dev);
+	}
+
+	adreno_efuse_unmap(adreno_dev);
+}
+
+static void a5xx_platform_setup(struct adreno_device *adreno_dev)
+{
+	uint64_t addr;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (adreno_is_a505_or_a506(adreno_dev)) {
+		gpudev->snapshot_data->sect_sizes->cp_meq = 32;
+		gpudev->snapshot_data->sect_sizes->cp_merciu = 1024;
+		gpudev->snapshot_data->sect_sizes->roq = 256;
+
+		/* A505 & A506 having 3 XIN ports in VBIF */
+		gpudev->vbif_xin_halt_ctrl0_mask =
+				A510_VBIF_XIN_HALT_CTRL0_MASK;
+	} else if (adreno_is_a510(adreno_dev)) {
+		gpudev->snapshot_data->sect_sizes->cp_meq = 32;
+		gpudev->snapshot_data->sect_sizes->cp_merciu = 32;
+		gpudev->snapshot_data->sect_sizes->roq = 256;
+
+		/* A510 has 3 XIN ports in VBIF */
+		gpudev->vbif_xin_halt_ctrl0_mask =
+				A510_VBIF_XIN_HALT_CTRL0_MASK;
+	} else if (adreno_is_a540(adreno_dev)) {
+		gpudev->snapshot_data->sect_sizes->cp_merciu = 1024;
+	}
+
+	/* Calculate SP local and private mem addresses */
+	addr = ALIGN(ADRENO_UCHE_GMEM_BASE + adreno_dev->gmem_size, SZ_64K);
+	adreno_dev->sp_local_gpuaddr = addr;
+	adreno_dev->sp_pvt_gpuaddr = addr + SZ_64K;
+
+	/* Setup defaults that might get changed by the fuse bits */
+	adreno_dev->lm_leakage = A530_DEFAULT_LEAKAGE;
+	adreno_dev->speed_bin = 0;
+
+	/* Check efuse bits for various capabilties */
+	a5xx_check_features(adreno_dev);
+}
+
+static void a5xx_critical_packet_destroy(struct adreno_device *adreno_dev)
+{
+	kgsl_free_global(&adreno_dev->dev, &crit_pkts);
+	kgsl_free_global(&adreno_dev->dev, &crit_pkts_refbuf1);
+	kgsl_free_global(&adreno_dev->dev, &crit_pkts_refbuf2);
+	kgsl_free_global(&adreno_dev->dev, &crit_pkts_refbuf3);
+
+	kgsl_sharedmem_free(&crit_pkts_refbuf0);
+
+}
+
+static void _do_fixup(const struct adreno_critical_fixup *fixups, int count,
+		uint64_t *gpuaddrs, unsigned int *buffer)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		buffer[fixups[i].lo_offset] =
+			lower_32_bits(gpuaddrs[fixups[i].buffer]) |
+			fixups[i].mem_offset;
+
+		buffer[fixups[i].hi_offset] =
+			upper_32_bits(gpuaddrs[fixups[i].buffer]);
+	}
+}
+
+static int a5xx_critical_packet_construct(struct adreno_device *adreno_dev)
+{
+
+	unsigned int *cmds;
+	uint64_t gpuaddrs[CRITICAL_PACKET_MAX];
+	int ret;
+
+	ret = kgsl_allocate_global(&adreno_dev->dev,
+					&crit_pkts, PAGE_SIZE,
+					KGSL_MEMFLAGS_GPUREADONLY,
+					0, "crit_pkts");
+	if (ret)
+		return ret;
+
+	ret = kgsl_allocate_user(&adreno_dev->dev, &crit_pkts_refbuf0,
+		PAGE_SIZE, KGSL_MEMFLAGS_SECURE);
+	if (ret)
+		return ret;
+
+	kgsl_add_global_secure_entry(&adreno_dev->dev,
+					&crit_pkts_refbuf0);
+
+	ret = kgsl_allocate_global(&adreno_dev->dev,
+					&crit_pkts_refbuf1,
+					PAGE_SIZE, 0, 0, "crit_pkts_refbuf1");
+	if (ret)
+		return ret;
+
+	ret = kgsl_allocate_global(&adreno_dev->dev,
+					&crit_pkts_refbuf2,
+					PAGE_SIZE, 0, 0, "crit_pkts_refbuf2");
+	if (ret)
+		return ret;
+
+	ret = kgsl_allocate_global(&adreno_dev->dev,
+					&crit_pkts_refbuf3,
+					PAGE_SIZE, 0, 0, "crit_pkts_refbuf3");
+	if (ret)
+		return ret;
+
+	cmds = crit_pkts.hostptr;
+
+	gpuaddrs[CRITICAL_PACKET0] = crit_pkts_refbuf0.gpuaddr;
+	gpuaddrs[CRITICAL_PACKET1] = crit_pkts_refbuf1.gpuaddr;
+	gpuaddrs[CRITICAL_PACKET2] = crit_pkts_refbuf2.gpuaddr;
+	gpuaddrs[CRITICAL_PACKET3] = crit_pkts_refbuf3.gpuaddr;
+
+	crit_pkts_dwords = ARRAY_SIZE(_a5xx_critical_pkts);
+
+	memcpy(cmds, _a5xx_critical_pkts, crit_pkts_dwords << 2);
+
+	_do_fixup(critical_pkt_fixups, ARRAY_SIZE(critical_pkt_fixups),
+		gpuaddrs, cmds);
+
+	cmds = crit_pkts_refbuf1.hostptr;
+	memcpy(cmds, _a5xx_critical_pkts_mem01,
+			ARRAY_SIZE(_a5xx_critical_pkts_mem01) << 2);
+
+	cmds = crit_pkts_refbuf2.hostptr;
+	memcpy(cmds, _a5xx_critical_pkts_mem02,
+			ARRAY_SIZE(_a5xx_critical_pkts_mem02) << 2);
+
+	cmds = crit_pkts_refbuf3.hostptr;
+	memcpy(cmds, _a5xx_critical_pkts_mem03,
+			ARRAY_SIZE(_a5xx_critical_pkts_mem03) << 2);
+
+	_do_fixup(critical_pkt_mem03_fixups,
+		ARRAY_SIZE(critical_pkt_mem03_fixups), gpuaddrs, cmds);
+
+	critical_packet_constructed = 1;
+
+	return 0;
+}
+
+static void a5xx_init(struct adreno_device *adreno_dev)
+{
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_GPMU))
+		INIT_WORK(&adreno_dev->gpmu_work, a5xx_gpmu_reset);
+
+	INIT_WORK(&adreno_dev->irq_storm_work, a5xx_irq_storm_worker);
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CRITICAL_PACKETS))
+		a5xx_critical_packet_construct(adreno_dev);
+
+	a5xx_crashdump_init(adreno_dev);
+}
+
+static void a5xx_remove(struct adreno_device *adreno_dev)
+{
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CRITICAL_PACKETS))
+		a5xx_critical_packet_destroy(adreno_dev);
+}
+
+/**
+ * a5xx_protect_init() - Initializes register protection on a5xx
+ * @device: Pointer to the device structure
+ * Performs register writes to enable protected access to sensitive
+ * registers
+ */
+static void a5xx_protect_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int index = 0;
+	struct kgsl_protected_registers *iommu_regs;
+
+	/* enable access protection to privileged registers */
+	kgsl_regwrite(device, A5XX_CP_PROTECT_CNTL, 0x00000007);
+
+	/* RBBM registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x4, 2);
+	adreno_set_protected_registers(adreno_dev, &index, 0x8, 3);
+	adreno_set_protected_registers(adreno_dev, &index, 0x10, 4);
+	adreno_set_protected_registers(adreno_dev, &index, 0x20, 5);
+	adreno_set_protected_registers(adreno_dev, &index, 0x40, 6);
+	adreno_set_protected_registers(adreno_dev, &index, 0x80, 6);
+
+	/* Content protection registers */
+	adreno_set_protected_registers(adreno_dev, &index,
+		   A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO, 4);
+	adreno_set_protected_registers(adreno_dev, &index,
+		   A5XX_RBBM_SECVID_TRUST_CNTL, 1);
+
+	/* CP registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0x800, 6);
+	adreno_set_protected_registers(adreno_dev, &index, 0x840, 3);
+	adreno_set_protected_registers(adreno_dev, &index, 0x880, 5);
+	adreno_set_protected_registers(adreno_dev, &index, 0x0AA0, 0);
+
+	/* RB registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xCC0, 0);
+	adreno_set_protected_registers(adreno_dev, &index, 0xCF0, 1);
+
+	/* VPC registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xE68, 3);
+	adreno_set_protected_registers(adreno_dev, &index, 0xE70, 4);
+
+	/* UCHE registers */
+	adreno_set_protected_registers(adreno_dev, &index, 0xE80, ilog2(16));
+
+	/* SMMU registers */
+	iommu_regs = kgsl_mmu_get_prot_regs(&device->mmu);
+	if (iommu_regs)
+		adreno_set_protected_registers(adreno_dev, &index,
+				iommu_regs->base, iommu_regs->range);
+}
+
+/*
+ * a5xx_is_sptp_idle() - A530 SP/TP/RAC should be power collapsed to be
+ * considered idle
+ * @adreno_dev: The adreno_device pointer
+ */
+static bool a5xx_is_sptp_idle(struct adreno_device *adreno_dev)
+{
+	unsigned int reg;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* If feature is not supported or enabled, no worry */
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) ||
+		!test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag))
+		return true;
+	kgsl_regread(device, A5XX_GPMU_SP_PWR_CLK_STATUS, &reg);
+	if (reg & BIT(20))
+		return false;
+	kgsl_regread(device, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, &reg);
+	return !(reg & BIT(20));
+}
+
+/*
+ * _poll_gdsc_status() - Poll the GDSC status register
+ * @adreno_dev: The adreno device pointer
+ * @status_reg: Offset of the status register
+ * @status_value: The expected bit value
+ *
+ * Poll the status register till the power-on bit is equal to the
+ * expected value or the max retries are exceeded.
+ */
+static int _poll_gdsc_status(struct adreno_device *adreno_dev,
+				unsigned int status_reg,
+				unsigned int status_value)
+{
+	unsigned int reg, retry = PWR_RETRY;
+
+	/* Bit 20 is the power on bit of SPTP and RAC GDSC status register */
+	do {
+		udelay(1);
+		kgsl_regread(KGSL_DEVICE(adreno_dev), status_reg, &reg);
+	} while (((reg & BIT(20)) != (status_value << 20)) && retry--);
+	if ((reg & BIT(20)) != (status_value << 20))
+		return -ETIMEDOUT;
+	return 0;
+}
+
+static void a5xx_restore_isense_regs(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg, i, ramp = GPMU_ISENSE_SAVE;
+	static unsigned int isense_regs[6] = {0xFFFF}, isense_reg_addr[] = {
+		A5XX_GPU_CS_DECIMAL_ALIGN,
+		A5XX_GPU_CS_SENSOR_PARAM_CORE_1,
+		A5XX_GPU_CS_SENSOR_PARAM_CORE_2,
+		A5XX_GPU_CS_SW_OV_FUSE_EN,
+		A5XX_GPU_CS_ENDPOINT_CALIBRATION_DONE,
+		A5XX_GPMU_TEMP_SENSOR_CONFIG};
+
+	if (!adreno_is_a540(adreno_dev))
+		return;
+
+	/* read signature */
+	kgsl_regread(device, ramp++, &reg);
+
+	if (reg == 0xBABEFACE) {
+		/* store memory locations in buffer */
+		for (i = 0; i < ARRAY_SIZE(isense_regs); i++)
+			kgsl_regread(device, ramp + i, isense_regs + i);
+
+		/* clear signature */
+		kgsl_regwrite(device, GPMU_ISENSE_SAVE, 0x0);
+	}
+
+	/* if we never stored memory locations - do nothing */
+	if (isense_regs[0] == 0xFFFF)
+		return;
+
+	/* restore registers from memory */
+	for (i = 0; i < ARRAY_SIZE(isense_reg_addr); i++)
+		kgsl_regwrite(device, isense_reg_addr[i], isense_regs[i]);
+
+}
+
+/*
+ * a5xx_regulator_enable() - Enable any necessary HW regulators
+ * @adreno_dev: The adreno device pointer
+ *
+ * Some HW blocks may need their regulators explicitly enabled
+ * on a restart.  Clocks must be on during this call.
+ */
+static int a5xx_regulator_enable(struct adreno_device *adreno_dev)
+{
+	unsigned int ret;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!(adreno_is_a530(adreno_dev) || adreno_is_a540(adreno_dev))) {
+		/* Halt the sp_input_clk at HM level */
+		kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0x00000055);
+		a5xx_hwcg_set(adreno_dev, true);
+		/* Turn on sp_input_clk at HM level */
+		kgsl_regrmw(device, A5XX_RBBM_CLOCK_CNTL, 3, 0);
+		return 0;
+	}
+
+	/*
+	 * Turn on smaller power domain first to reduce voltage droop.
+	 * Set the default register values; set SW_COLLAPSE to 0.
+	 */
+	kgsl_regwrite(device, A5XX_GPMU_RBCCU_POWER_CNTL, 0x778000);
+	/* Insert a delay between RAC and SPTP GDSC to reduce voltage droop */
+	udelay(3);
+	ret = _poll_gdsc_status(adreno_dev, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 1);
+	if (ret) {
+		KGSL_PWR_ERR(device, "RBCCU GDSC enable failed\n");
+		return ret;
+	}
+
+	kgsl_regwrite(device, A5XX_GPMU_SP_POWER_CNTL, 0x778000);
+	ret = _poll_gdsc_status(adreno_dev, A5XX_GPMU_SP_PWR_CLK_STATUS, 1);
+	if (ret) {
+		KGSL_PWR_ERR(device, "SPTP GDSC enable failed\n");
+		return ret;
+	}
+
+	/* Disable SP clock */
+	kgsl_regrmw(device, A5XX_GPMU_GPMU_SP_CLOCK_CONTROL,
+		CNTL_IP_CLK_ENABLE, 0);
+	/* Enable hardware clockgating */
+	a5xx_hwcg_set(adreno_dev, true);
+	/* Enable SP clock */
+	kgsl_regrmw(device, A5XX_GPMU_GPMU_SP_CLOCK_CONTROL,
+		CNTL_IP_CLK_ENABLE, 1);
+
+	a5xx_restore_isense_regs(adreno_dev);
+	return 0;
+}
+
+/*
+ * a5xx_regulator_disable() - Disable any necessary HW regulators
+ * @adreno_dev: The adreno device pointer
+ *
+ * Some HW blocks may need their regulators explicitly disabled
+ * on a power down to prevent current spikes.  Clocks must be on
+ * during this call.
+ */
+static void a5xx_regulator_disable(struct adreno_device *adreno_dev)
+{
+	unsigned int reg;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* If feature is not supported or not enabled */
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) ||
+		!test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag)) {
+		/* Set the default register values; set SW_COLLAPSE to 1 */
+		kgsl_regwrite(device, A5XX_GPMU_SP_POWER_CNTL, 0x778001);
+		/*
+		 * Insert a delay between SPTP and RAC GDSC to reduce voltage
+		 * droop.
+		 */
+		udelay(3);
+		if (_poll_gdsc_status(adreno_dev,
+					A5XX_GPMU_SP_PWR_CLK_STATUS, 0))
+			KGSL_PWR_WARN(device, "SPTP GDSC disable failed\n");
+
+		kgsl_regwrite(device, A5XX_GPMU_RBCCU_POWER_CNTL, 0x778001);
+		if (_poll_gdsc_status(adreno_dev,
+					A5XX_GPMU_RBCCU_PWR_CLK_STATUS, 0))
+			KGSL_PWR_WARN(device, "RBCCU GDSC disable failed\n");
+	} else if (test_bit(ADRENO_DEVICE_GPMU_INITIALIZED,
+			&adreno_dev->priv)) {
+		/* GPMU firmware is supposed to turn off SPTP & RAC GDSCs. */
+		kgsl_regread(device, A5XX_GPMU_SP_PWR_CLK_STATUS, &reg);
+		if (reg & BIT(20))
+			KGSL_PWR_WARN(device, "SPTP GDSC is not disabled\n");
+		kgsl_regread(device, A5XX_GPMU_RBCCU_PWR_CLK_STATUS, &reg);
+		if (reg & BIT(20))
+			KGSL_PWR_WARN(device, "RBCCU GDSC is not disabled\n");
+		/*
+		 * GPMU firmware is supposed to set GMEM to non-retention.
+		 * Bit 14 is the memory core force on bit.
+		 */
+		kgsl_regread(device, A5XX_GPMU_RBCCU_CLOCK_CNTL, &reg);
+		if (reg & BIT(14))
+			KGSL_PWR_WARN(device, "GMEM is forced on\n");
+	}
+
+	if (adreno_is_a530(adreno_dev)) {
+		/* Reset VBIF before PC to avoid popping bogus FIFO entries */
+		kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD,
+			0x003C0000);
+		kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, 0);
+	}
+}
+
+/*
+ * a5xx_enable_pc() - Enable the GPMU based power collapse of the SPTP and RAC
+ * blocks
+ * @adreno_dev: The adreno device pointer
+ */
+static void a5xx_enable_pc(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_SPTP_PC) ||
+		!test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag))
+		return;
+
+	kgsl_regwrite(device, A5XX_GPMU_PWR_COL_INTER_FRAME_CTRL, 0x0000007F);
+	kgsl_regwrite(device, A5XX_GPMU_PWR_COL_BINNING_CTRL, 0);
+	kgsl_regwrite(device, A5XX_GPMU_PWR_COL_INTER_FRAME_HYST, 0x000A0080);
+	kgsl_regwrite(device, A5XX_GPMU_PWR_COL_STAGGER_DELAY, 0x00600040);
+
+	trace_adreno_sp_tp((unsigned long) __builtin_return_address(0));
+};
+
+/*
+ * The maximum payload of a type4 packet is the max size minus one for the
+ * opcode
+ */
+#define TYPE4_MAX_PAYLOAD (PM4_TYPE4_PKT_SIZE_MAX - 1)
+
+static int _gpmu_create_load_cmds(struct adreno_device *adreno_dev,
+	uint32_t *ucode, uint32_t size)
+{
+	uint32_t *start, *cmds;
+	uint32_t offset = 0;
+	uint32_t cmds_size = size;
+
+	/* Add a dword for each PM4 packet */
+	cmds_size += (size / TYPE4_MAX_PAYLOAD) + 1;
+
+	/* Add 4 dwords for the protected mode */
+	cmds_size += 4;
+
+	if (adreno_dev->gpmu_cmds != NULL)
+		return 0;
+
+	adreno_dev->gpmu_cmds = kmalloc(cmds_size << 2, GFP_KERNEL);
+	if (adreno_dev->gpmu_cmds == NULL)
+		return -ENOMEM;
+
+	cmds = adreno_dev->gpmu_cmds;
+	start = cmds;
+
+	/* Turn CP protection OFF */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 0;
+
+	/*
+	 * Prebuild the cmd stream to send to the GPU to load
+	 * the GPMU firmware
+	 */
+	while (size > 0) {
+		int tmp_size = size;
+
+		if (size >= TYPE4_MAX_PAYLOAD)
+			tmp_size = TYPE4_MAX_PAYLOAD;
+
+		*cmds++ = cp_type4_packet(
+				A5XX_GPMU_INST_RAM_BASE + offset,
+				tmp_size);
+
+		memcpy(cmds, &ucode[offset], tmp_size << 2);
+
+		cmds += tmp_size;
+		offset += tmp_size;
+		size -= tmp_size;
+	}
+
+	/* Turn CP protection ON */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 1;
+
+	adreno_dev->gpmu_cmds_size = (size_t) (cmds - start);
+
+	return 0;
+}
+
+
+/*
+ * _load_gpmu_firmware() - Load the ucode into the GPMU RAM
+ * @adreno_dev: Pointer to adreno device
+ */
+static int _load_gpmu_firmware(struct adreno_device *adreno_dev)
+{
+	uint32_t *data;
+	const struct firmware *fw = NULL;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct adreno_gpu_core *gpucore = adreno_dev->gpucore;
+	uint32_t *cmds, cmd_size;
+	int ret =  -EINVAL;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_GPMU))
+		return 0;
+
+	/* gpmu fw already saved and verified so do nothing new */
+	if (adreno_dev->gpmu_cmds_size != 0)
+		return 0;
+
+	if (gpucore->gpmufw_name == NULL)
+		return 0;
+
+	ret = request_firmware(&fw, gpucore->gpmufw_name, device->dev);
+	if (ret || fw == NULL) {
+		KGSL_CORE_ERR("request_firmware (%s) failed: %d\n",
+				gpucore->gpmufw_name, ret);
+		return ret;
+	}
+
+	data = (uint32_t *)fw->data;
+
+	if (data[0] >= (fw->size / sizeof(uint32_t)) || data[0] < 2)
+		goto err;
+
+	if (data[1] != GPMU_FIRMWARE_ID)
+		goto err;
+	ret = _read_fw2_block_header(&data[2],
+		GPMU_FIRMWARE_ID,
+		adreno_dev->gpucore->gpmu_major,
+		adreno_dev->gpucore->gpmu_minor);
+	if (ret)
+		goto err;
+
+	cmds = data + data[2] + 3;
+	cmd_size = data[0] - data[2] - 2;
+
+	if (cmd_size > GPMU_INST_RAM_SIZE) {
+		KGSL_CORE_ERR(
+			"GPMU firmware block size is larger than RAM size\n");
+		goto err;
+	}
+
+	/* Everything is cool, so create some commands */
+	ret = _gpmu_create_load_cmds(adreno_dev, cmds, cmd_size);
+err:
+	if (fw)
+		release_firmware(fw);
+
+	return ret;
+}
+
+static int _gpmu_send_init_cmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	uint32_t *cmds;
+	uint32_t size = adreno_dev->gpmu_cmds_size;
+	int ret;
+
+	if (size == 0 || adreno_dev->gpmu_cmds == NULL)
+		return -EINVAL;
+
+	cmds = adreno_ringbuffer_allocspace(rb, size);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	/* Copy to the RB the predefined fw sequence cmds */
+	memcpy(cmds, adreno_dev->gpmu_cmds, size << 2);
+
+	ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
+	if (ret != 0)
+		spin_idle_debug(&adreno_dev->dev,
+				"gpmu initialization failed to idle\n");
+
+	return ret;
+}
+
+/*
+ * a5xx_gpmu_start() - Initialize and start the GPMU
+ * @adreno_dev: Pointer to adreno device
+ *
+ * Load the GPMU microcode, set up any features such as hardware clock gating
+ * or IFPC, and take the GPMU out of reset.
+ */
+static int a5xx_gpmu_start(struct adreno_device *adreno_dev)
+{
+	int ret;
+	unsigned int reg, retry = GPMU_FW_INIT_RETRY;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_GPMU))
+		return 0;
+
+	ret = _gpmu_send_init_cmds(adreno_dev);
+	if (ret)
+		return ret;
+
+	if (adreno_is_a530(adreno_dev)) {
+		/* GPMU clock gating setup */
+		kgsl_regwrite(device, A5XX_GPMU_WFI_CONFIG, 0x00004014);
+	}
+	/* Kick off GPMU firmware */
+	kgsl_regwrite(device, A5XX_GPMU_CM3_SYSRESET, 0);
+	/*
+	 * The hardware team's estimation of GPMU firmware initialization
+	 * latency is about 3000 cycles, that's about 5 to 24 usec.
+	 */
+	do {
+		udelay(1);
+		kgsl_regread(device, A5XX_GPMU_GENERAL_0, &reg);
+	} while ((reg != 0xBABEFACE) && retry--);
+
+	if (reg != 0xBABEFACE) {
+		KGSL_CORE_ERR("GPMU firmware initialization timed out\n");
+		return -ETIMEDOUT;
+	}
+
+	if (!adreno_is_a530(adreno_dev)) {
+		kgsl_regread(device, A5XX_GPMU_GENERAL_1, &reg);
+
+		if (reg) {
+			KGSL_CORE_ERR(
+				"GPMU firmware initialization failed: %d\n",
+				reg);
+			return -EIO;
+		}
+	}
+	set_bit(ADRENO_DEVICE_GPMU_INITIALIZED, &adreno_dev->priv);
+	/*
+	 *  We are in AWARE state and IRQ line from GPU to host is
+	 *  disabled.
+	 *  Read pending GPMU interrupts and clear GPMU_RBBM_INTR_INFO.
+	 */
+	kgsl_regread(device, A5XX_GPMU_RBBM_INTR_INFO, &reg);
+	/*
+	 * Clear RBBM interrupt mask if any of GPMU interrupts
+	 * are pending.
+	 */
+	if (reg)
+		kgsl_regwrite(device,
+			A5XX_RBBM_INT_CLEAR_CMD,
+			1 << A5XX_INT_GPMU_FIRMWARE);
+	return ret;
+}
+
+struct kgsl_hwcg_reg {
+	unsigned int off;
+	unsigned int val;
+};
+
+static const struct kgsl_hwcg_reg a50x_hwcg_regs[] = {
+	{A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+	{A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+	{A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4},
+	{A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+	{A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+	{A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
+};
+
+static const struct kgsl_hwcg_reg a510_hwcg_regs[] = {
+	{A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
+	{A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
+	{A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
+	{A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
+	{A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
+	{A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+	{A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+	{A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
+};
+
+static const struct kgsl_hwcg_reg a530_hwcg_regs[] = {
+	{A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
+	{A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
+	{A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
+	{A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
+	{A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
+	{A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+	{A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+	{A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
+};
+
+
+static const struct kgsl_hwcg_reg a540_hwcg_regs[] = {
+	{A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP2, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL_SP3, 0x02222222},
+	{A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP2, 0x02222220},
+	{A5XX_RBBM_CLOCK_CNTL2_SP3, 0x02222220},
+	{A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP2, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_HYST_SP3, 0x0000F3CF},
+	{A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP2, 0x00000080},
+	{A5XX_RBBM_CLOCK_DELAY_SP3, 0x00000080},
+	{A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_TP3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_TP3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP2, 0x00002222},
+	{A5XX_RBBM_CLOCK_CNTL3_TP3, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP2, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST_TP3, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP2, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST2_TP3, 0x77777777},
+	{A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP2, 0x00007777},
+	{A5XX_RBBM_CLOCK_HYST3_TP3, 0x00007777},
+	{A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP2, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY_TP3, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP2, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY2_TP3, 0x11111111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP2, 0x00001111},
+	{A5XX_RBBM_CLOCK_DELAY3_TP3, 0x00001111},
+	{A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+	{A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
+	{A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+	{A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB2, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL_RB3, 0x22222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB2, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL2_RB3, 0x00222222},
+	{A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU2, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_CCU3, 0x00022220},
+	{A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+	{A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU2, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RB_CCU3, 0x04040404},
+	{A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_2, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_3, 0x00000002},
+	{A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+	{A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+	{A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+	{A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+	{A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+	{A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+	{A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+	{A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+	{A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+	{A5XX_RBBM_CLOCK_HYST_GPMU, 0x00000222},
+	{A5XX_RBBM_CLOCK_DELAY_GPMU, 0x00000770},
+	{A5XX_RBBM_CLOCK_HYST_GPMU, 0x00000004}
+};
+
+static const struct {
+	int (*devfunc)(struct adreno_device *adreno_dev);
+	const struct kgsl_hwcg_reg *regs;
+	unsigned int count;
+} a5xx_hwcg_registers[] = {
+	{ adreno_is_a540, a540_hwcg_regs, ARRAY_SIZE(a540_hwcg_regs) },
+	{ adreno_is_a530, a530_hwcg_regs, ARRAY_SIZE(a530_hwcg_regs) },
+	{ adreno_is_a510, a510_hwcg_regs, ARRAY_SIZE(a510_hwcg_regs) },
+	{ adreno_is_a505, a50x_hwcg_regs, ARRAY_SIZE(a50x_hwcg_regs) },
+	{ adreno_is_a506, a50x_hwcg_regs, ARRAY_SIZE(a50x_hwcg_regs) },
+};
+
+void a5xx_hwcg_set(struct adreno_device *adreno_dev, bool on)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct kgsl_hwcg_reg *regs;
+	int i, j;
+
+	if (!test_bit(ADRENO_HWCG_CTRL, &adreno_dev->pwrctrl_flag))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_hwcg_registers); i++) {
+		if (a5xx_hwcg_registers[i].devfunc(adreno_dev))
+			break;
+	}
+
+	if (i == ARRAY_SIZE(a5xx_hwcg_registers))
+		return;
+
+	regs = a5xx_hwcg_registers[i].regs;
+
+	for (j = 0; j < a5xx_hwcg_registers[i].count; j++)
+		kgsl_regwrite(device, regs[j].off, on ? regs[j].val : 0);
+
+	/* enable top level HWCG */
+	kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, on ? 0xAAA8AA00 : 0);
+	kgsl_regwrite(device, A5XX_RBBM_ISDB_CNT, on ? 0x00000182 : 0x00000180);
+}
+
+static int _read_fw2_block_header(uint32_t *header, uint32_t id,
+				uint32_t major, uint32_t minor)
+{
+	uint32_t header_size;
+	int i = 1;
+
+	if (header == NULL)
+		return -ENOMEM;
+
+	header_size = header[0];
+	/* Headers have limited size and always occur as pairs of words */
+	if (header_size > MAX_HEADER_SIZE || header_size % 2)
+		return -EINVAL;
+	/* Sequences must have an identifying id first thing in their header */
+	if (id == GPMU_SEQUENCE_ID) {
+		if (header[i] != HEADER_SEQUENCE ||
+			(header[i + 1] >= MAX_SEQUENCE_ID))
+			return -EINVAL;
+		i += 2;
+	}
+	for (; i < header_size; i += 2) {
+		switch (header[i]) {
+		/* Major Version */
+		case HEADER_MAJOR:
+			if ((major > header[i + 1]) &&
+				header[i + 1]) {
+				KGSL_CORE_ERR(
+					"GPMU major version mis-match %d, %d\n",
+					major, header[i + 1]);
+				return -EINVAL;
+			}
+			break;
+		case HEADER_MINOR:
+			if (minor > header[i + 1])
+				KGSL_CORE_ERR(
+					"GPMU minor version mis-match %d %d\n",
+					minor, header[i + 1]);
+			break;
+		case HEADER_DATE:
+		case HEADER_TIME:
+			break;
+		default:
+			KGSL_CORE_ERR("GPMU unknown header ID %d\n",
+					header[i]);
+		}
+	}
+	return 0;
+}
+
+/*
+ * Read in the register sequence file and save pointers to the
+ * necessary sequences.
+ *
+ * GPU sequence file format (one dword per field unless noted):
+ * Block 1 length (length dword field not inclusive)
+ * Block 1 type = Sequence = 3
+ * Block Header length (length dword field not inclusive)
+ * BH field ID = Sequence field ID
+ * BH field data = Sequence ID
+ * BH field ID
+ * BH field data
+ * ...
+ * Opcode 0 ID
+ * Opcode 0 data M words
+ * Opcode 1 ID
+ * Opcode 1 data N words
+ * ...
+ * Opcode X ID
+ * Opcode X data O words
+ * Block 2 length...
+ */
+static void _load_regfile(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	const struct firmware *fw;
+	uint32_t block_size = 0, block_total = 0, fw_size;
+	uint32_t *block;
+	int ret = -EINVAL;
+
+	if (!adreno_dev->gpucore->regfw_name)
+		return;
+
+	ret = request_firmware(&fw, adreno_dev->gpucore->regfw_name,
+			device->dev);
+	if (ret) {
+		KGSL_PWR_ERR(device, "request firmware failed %d, %s\n",
+				ret, adreno_dev->gpucore->regfw_name);
+		return;
+	}
+
+	fw_size = fw->size / sizeof(uint32_t);
+	/* Min valid file of size 6, see file description */
+	if (fw_size < 6)
+		goto err;
+	block = (uint32_t *)fw->data;
+	/* All offset numbers calculated from file description */
+	while (block_total < fw_size) {
+		block_size = block[0];
+		if (block_size >= fw_size || block_size < 2)
+			goto err;
+		if (block[1] != GPMU_SEQUENCE_ID)
+			goto err;
+
+		/* For now ignore blocks other than the LM sequence */
+		if (block[4] == LM_SEQUENCE_ID) {
+			ret = _read_fw2_block_header(&block[2],
+				GPMU_SEQUENCE_ID,
+				adreno_dev->gpucore->lm_major,
+				adreno_dev->gpucore->lm_minor);
+			if (ret)
+				goto err;
+
+			adreno_dev->lm_fw = fw;
+			adreno_dev->lm_sequence = block + block[2] + 3;
+			adreno_dev->lm_size = block_size - block[2] - 2;
+		}
+		block_total += (block_size + 1);
+		block += (block_size + 1);
+	}
+	if (adreno_dev->lm_sequence)
+		return;
+
+err:
+	release_firmware(fw);
+	KGSL_PWR_ERR(device,
+		"Register file failed to load sz=%d bsz=%d header=%d\n",
+		fw_size, block_size, ret);
+}
+
+static int _execute_reg_sequence(struct adreno_device *adreno_dev,
+			uint32_t *opcode, uint32_t length)
+{
+	uint32_t *cur = opcode;
+	uint64_t reg, val;
+
+	/* todo double check the reg writes */
+	while ((cur - opcode) < length) {
+		switch (cur[0]) {
+		/* Write a 32 bit value to a 64 bit reg */
+		case 1:
+			reg = cur[2];
+			reg = (reg << 32) | cur[1];
+			kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg, cur[3]);
+			cur += 4;
+			break;
+		/* Write a 64 bit value to a 64 bit reg */
+		case 2:
+			reg = cur[2];
+			reg = (reg << 32) | cur[1];
+			val = cur[4];
+			val = (val << 32) | cur[3];
+			kgsl_regwrite(KGSL_DEVICE(adreno_dev), reg, val);
+			cur += 5;
+			break;
+		/* Delay for X usec */
+		case 3:
+			udelay(cur[1]);
+			cur += 2;
+			break;
+		default:
+			return -EINVAL;
+	} }
+	return 0;
+}
+
+static uint32_t _write_voltage_table(struct adreno_device *adreno_dev,
+			unsigned int addr)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int i;
+	struct dev_pm_opp *opp;
+	int levels = pwr->num_pwrlevels - 1;
+	unsigned int mvolt = 0;
+
+	kgsl_regwrite(device, addr++, adreno_dev->gpucore->max_power);
+	kgsl_regwrite(device, addr++, levels);
+
+	/* Write voltage in mV and frequency in MHz */
+	for (i = 0; i < levels; i++) {
+		opp = dev_pm_opp_find_freq_exact(&device->pdev->dev,
+				pwr->pwrlevels[i].gpu_freq, true);
+		/* _opp_get returns uV, convert to mV */
+		if (!IS_ERR(opp))
+			mvolt = dev_pm_opp_get_voltage(opp) / 1000;
+		kgsl_regwrite(device, addr++, mvolt);
+		kgsl_regwrite(device, addr++,
+				pwr->pwrlevels[i].gpu_freq / 1000000);
+	}
+	return (levels * 2 + 2);
+}
+
+static uint32_t lm_limit(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (adreno_dev->lm_limit)
+		return adreno_dev->lm_limit;
+
+	if (of_property_read_u32(device->pdev->dev.of_node, "qcom,lm-limit",
+		&adreno_dev->lm_limit))
+		adreno_dev->lm_limit = LM_DEFAULT_LIMIT;
+
+	return adreno_dev->lm_limit;
+}
+/*
+ * a5xx_lm_init() - Initialize LM/DPM on the GPMU
+ * @adreno_dev: The adreno device pointer
+ */
+static void a530_lm_init(struct adreno_device *adreno_dev)
+{
+	uint32_t length;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM) ||
+		!test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag))
+		return;
+
+	/* If something was wrong with the sequence file, return */
+	if (adreno_dev->lm_sequence == NULL)
+		return;
+
+	/* Write LM registers including DPM ucode, coefficients, and config */
+	if (_execute_reg_sequence(adreno_dev, adreno_dev->lm_sequence,
+				adreno_dev->lm_size)) {
+		/* If the sequence is invalid, it's not getting better */
+		adreno_dev->lm_sequence = NULL;
+		KGSL_PWR_WARN(device,
+				"Invalid LM sequence\n");
+		return;
+	}
+
+	kgsl_regwrite(device, A5XX_GPMU_TEMP_SENSOR_ID,
+			adreno_dev->gpucore->gpmu_tsens);
+	kgsl_regwrite(device, A5XX_GPMU_DELTA_TEMP_THRESHOLD, 0x1);
+	kgsl_regwrite(device, A5XX_GPMU_TEMP_SENSOR_CONFIG, 0x1);
+
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE,
+			(0x80000000 | device->pwrctrl.active_pwrlevel));
+	/* use the leakage to set this value at runtime */
+	kgsl_regwrite(device, A5XX_GPMU_BASE_LEAKAGE,
+		adreno_dev->lm_leakage);
+
+	/* Enable the power threshold and set it to 6000m */
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_PWR_THRESHOLD,
+		0x80000000 | lm_limit(adreno_dev));
+
+	kgsl_regwrite(device, A5XX_GPMU_BEC_ENABLE, 0x10001FFF);
+	kgsl_regwrite(device, A5XX_GDPM_CONFIG1, 0x00201FF1);
+
+	/* Send an initial message to the GPMU with the LM voltage table */
+	kgsl_regwrite(device, AGC_MSG_STATE, 1);
+	kgsl_regwrite(device, AGC_MSG_COMMAND, AGC_POWER_CONFIG_PRODUCTION_ID);
+	length = _write_voltage_table(adreno_dev, AGC_MSG_PAYLOAD);
+	kgsl_regwrite(device, AGC_MSG_PAYLOAD_SIZE, length * sizeof(uint32_t));
+	kgsl_regwrite(device, AGC_INIT_MSG_MAGIC, AGC_INIT_MSG_VALUE);
+}
+
+/*
+ * a5xx_lm_enable() - Enable the LM/DPM feature on the GPMU
+ * @adreno_dev: The adreno device pointer
+ */
+static void a530_lm_enable(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM) ||
+		!test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag))
+		return;
+
+	/* If no sequence properly initialized, return */
+	if (adreno_dev->lm_sequence == NULL)
+		return;
+
+	kgsl_regwrite(device, A5XX_GDPM_INT_MASK, 0x00000000);
+	kgsl_regwrite(device, A5XX_GDPM_INT_EN, 0x0000000A);
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK, 0x00000001);
+	kgsl_regwrite(device, A5XX_GPMU_TEMP_THRESHOLD_INTR_EN_MASK,
+			0x00050000);
+	kgsl_regwrite(device, A5XX_GPMU_THROTTLE_UNMASK_FORCE_CTRL,
+			0x00030000);
+
+	if (adreno_is_a530(adreno_dev))
+		/* Program throttle control, do not enable idle DCS on v3+ */
+		kgsl_regwrite(device, A5XX_GPMU_CLOCK_THROTTLE_CTRL,
+			adreno_is_a530v2(adreno_dev) ? 0x00060011 : 0x00000011);
+}
+
+static void a540_lm_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	uint32_t agc_lm_config = AGC_BCL_DISABLED |
+		((ADRENO_CHIPID_PATCH(adreno_dev->chipid) & 0x3)
+		<< AGC_GPU_VERSION_SHIFT);
+	unsigned int r;
+
+	if (!test_bit(ADRENO_THROTTLING_CTRL, &adreno_dev->pwrctrl_flag))
+		agc_lm_config |= AGC_THROTTLE_DISABLE;
+
+	if (lm_on(adreno_dev)) {
+		agc_lm_config |=
+			AGC_LM_CONFIG_ENABLE_GPMU_ADAPTIVE |
+			AGC_LM_CONFIG_ISENSE_ENABLE;
+
+		kgsl_regread(device, A5XX_GPMU_TEMP_SENSOR_CONFIG, &r);
+
+		if ((r & GPMU_ISENSE_STATUS) == GPMU_ISENSE_END_POINT_CAL_ERR) {
+			KGSL_CORE_ERR(
+				"GPMU: ISENSE end point calibration failure\n");
+			agc_lm_config |= AGC_LM_CONFIG_ENABLE_ERROR;
+		}
+	}
+
+	kgsl_regwrite(device, AGC_MSG_STATE, 0x80000001);
+	kgsl_regwrite(device, AGC_MSG_COMMAND, AGC_POWER_CONFIG_PRODUCTION_ID);
+	(void) _write_voltage_table(adreno_dev, AGC_MSG_PAYLOAD);
+	kgsl_regwrite(device, AGC_MSG_PAYLOAD + AGC_LM_CONFIG, agc_lm_config);
+	kgsl_regwrite(device, AGC_MSG_PAYLOAD + AGC_LEVEL_CONFIG,
+		(unsigned int) ~(GENMASK(LM_DCVS_LIMIT, 0) |
+				GENMASK(16+LM_DCVS_LIMIT, 16)));
+
+	kgsl_regwrite(device, AGC_MSG_PAYLOAD_SIZE,
+		(AGC_LEVEL_CONFIG + 1) * sizeof(uint32_t));
+	kgsl_regwrite(device, AGC_INIT_MSG_MAGIC, AGC_INIT_MSG_VALUE);
+
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE,
+		(0x80000000 | device->pwrctrl.active_pwrlevel));
+
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_PWR_THRESHOLD,
+		PWR_THRESHOLD_VALID | lm_limit(adreno_dev));
+
+	kgsl_regwrite(device, A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK,
+		VOLTAGE_INTR_EN);
+}
+
+
+static void a5xx_lm_enable(struct adreno_device *adreno_dev)
+{
+	if (adreno_is_a530(adreno_dev))
+		a530_lm_enable(adreno_dev);
+}
+
+static void a5xx_lm_init(struct adreno_device *adreno_dev)
+{
+	if (adreno_is_a530(adreno_dev))
+		a530_lm_init(adreno_dev);
+	else if (adreno_is_a540(adreno_dev))
+		a540_lm_init(adreno_dev);
+}
+
+static int gpmu_set_level(struct adreno_device *adreno_dev, unsigned int val)
+{
+	unsigned int reg;
+	int retry = 100;
+
+	kgsl_regwrite(KGSL_DEVICE(adreno_dev), A5XX_GPMU_GPMU_VOLTAGE, val);
+
+	do {
+		kgsl_regread(KGSL_DEVICE(adreno_dev), A5XX_GPMU_GPMU_VOLTAGE,
+			&reg);
+	} while ((reg & 0x80000000) && retry--);
+
+	return (reg & 0x80000000) ? -ETIMEDOUT : 0;
+}
+
+/*
+ * a5xx_pwrlevel_change_settings() - Program the hardware during power level
+ * transitions
+ * @adreno_dev: The adreno device pointer
+ * @prelevel: The previous power level
+ * @postlevel: The new power level
+ * @post: True if called after the clock change has taken effect
+ */
+static void a5xx_pwrlevel_change_settings(struct adreno_device *adreno_dev,
+				unsigned int prelevel, unsigned int postlevel,
+				bool post)
+{
+	int on = 0;
+
+	/*
+	 * On pre A540 HW only call through if PPD or LMx
+	 * is supported and enabled
+	 */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_PPD) &&
+		test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag))
+		on = ADRENO_PPD;
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LM) &&
+		test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag))
+		on = ADRENO_LM;
+
+	/* On 540+ HW call through unconditionally as long as GPMU is enabled */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_GPMU)) {
+		if (adreno_is_a540(adreno_dev))
+			on = ADRENO_GPMU;
+	}
+
+	if (!on)
+		return;
+
+	if (post == 0) {
+		if (gpmu_set_level(adreno_dev, (0x80000010 | postlevel)))
+			KGSL_CORE_ERR(
+				"GPMU pre powerlevel did not stabilize\n");
+	} else {
+		if (gpmu_set_level(adreno_dev, (0x80000000 | postlevel)))
+			KGSL_CORE_ERR(
+				"GPMU post powerlevel did not stabilize\n");
+	}
+}
+
+static void a5xx_clk_set_options(struct adreno_device *adreno_dev,
+	const char *name, struct clk *clk)
+{
+	if (adreno_is_a540(adreno_dev)) {
+		if (!strcmp(name, "mem_iface_clk")) {
+			clk_set_flags(clk, CLKFLAG_NORETAIN_PERIPH);
+			clk_set_flags(clk, CLKFLAG_NORETAIN_MEM);
+		} else if (!strcmp(name, "core_clk")) {
+			clk_set_flags(clk, CLKFLAG_RETAIN_PERIPH);
+			clk_set_flags(clk, CLKFLAG_RETAIN_MEM);
+		}
+	}
+}
+
+static void a5xx_count_throttles(struct adreno_device *adreno_dev,
+		uint64_t adj)
+{
+	if (adreno_is_a530(adreno_dev))
+		kgsl_regread(KGSL_DEVICE(adreno_dev),
+				adreno_dev->lm_threshold_count,
+				&adreno_dev->lm_threshold_cross);
+	else if (adreno_is_a540(adreno_dev))
+		adreno_dev->lm_threshold_cross = adj;
+}
+
+static int a5xx_enable_pwr_counters(struct adreno_device *adreno_dev,
+		unsigned int counter)
+{
+	/*
+	 * On 5XX we have to emulate the PWR counters which are physically
+	 * missing. Program countable 6 on RBBM_PERFCTR_RBBM_0 as a substitute
+	 * for PWR:1. Don't emulate PWR:0 as nobody uses it and we don't want
+	 * to take away too many of the generic RBBM counters.
+	 */
+
+	if (counter == 0)
+		return -EINVAL;
+
+	kgsl_regwrite(KGSL_DEVICE(adreno_dev), A5XX_RBBM_PERFCTR_RBBM_SEL_0, 6);
+
+	return 0;
+}
+
+/* FW driven idle 10% throttle */
+#define IDLE_10PCT 0
+/* number of cycles when clock is throttled by 50% (CRC) */
+#define CRC_50PCT  1
+/* number of cycles when clock is throttled by more than 50% (CRC) */
+#define CRC_MORE50PCT 2
+/* number of cycles when clock is throttle by less than 50% (CRC) */
+#define CRC_LESS50PCT 3
+
+static uint64_t a5xx_read_throttling_counters(struct adreno_device *adreno_dev)
+{
+	int i, adj;
+	uint32_t th[ADRENO_GPMU_THROTTLE_COUNTERS];
+	struct adreno_busy_data *busy = &adreno_dev->busy_data;
+
+	if (!adreno_is_a540(adreno_dev))
+		return 0;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_GPMU))
+		return 0;
+
+	if (!test_bit(ADRENO_THROTTLING_CTRL, &adreno_dev->pwrctrl_flag))
+		return 0;
+
+	for (i = 0; i < ADRENO_GPMU_THROTTLE_COUNTERS; i++) {
+		if (!adreno_dev->gpmu_throttle_counters[i])
+			return 0;
+
+		th[i] = counter_delta(KGSL_DEVICE(adreno_dev),
+				adreno_dev->gpmu_throttle_counters[i],
+				&busy->throttle_cycles[i]);
+	}
+	adj = th[CRC_MORE50PCT] - th[IDLE_10PCT];
+	adj = th[CRC_50PCT] + th[CRC_LESS50PCT] / 3 + (adj < 0 ? 0 : adj) * 3;
+
+	trace_kgsl_clock_throttling(
+		th[IDLE_10PCT], th[CRC_50PCT],
+		th[CRC_MORE50PCT], th[CRC_LESS50PCT],
+		adj);
+	return adj;
+}
+
+static void a5xx_enable_64bit(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_regwrite(device, A5XX_CP_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_VSC_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_GRAS_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_RB_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_PC_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_HLSQ_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_VFD_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_VPC_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_UCHE_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_SP_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_TPL1_ADDR_MODE_CNTL, 0x1);
+	kgsl_regwrite(device, A5XX_RBBM_SECVID_TSB_ADDR_MODE_CNTL, 0x1);
+}
+
+/*
+ * a5xx_gpmu_reset() - Re-enable GPMU based power features and restart GPMU
+ * @work: Pointer to the work struct for gpmu reset
+ *
+ * Load the GPMU microcode, set up any features such as hardware clock gating
+ * or IFPC, and take the GPMU out of reset.
+ */
+static void a5xx_gpmu_reset(struct work_struct *work)
+{
+	struct adreno_device *adreno_dev = container_of(work,
+			struct adreno_device, gpmu_work);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (test_bit(ADRENO_DEVICE_GPMU_INITIALIZED, &adreno_dev->priv))
+		return;
+
+	/*
+	 * If GPMU has already experienced a restart or is in the process of it
+	 * after the watchdog timeout, then there is no need to reset GPMU
+	 * again.
+	 */
+	if (device->state != KGSL_STATE_NAP &&
+		device->state != KGSL_STATE_AWARE &&
+		device->state != KGSL_STATE_ACTIVE)
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (device->state == KGSL_STATE_NAP)
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+
+	if (a5xx_regulator_enable(adreno_dev))
+		goto out;
+
+	/* Soft reset of the GPMU block */
+	kgsl_regwrite(device, A5XX_RBBM_BLOCK_SW_RESET_CMD, BIT(16));
+
+	/* GPU comes up in secured mode, make it unsecured by default */
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION))
+		kgsl_regwrite(device, A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
+
+
+	a5xx_gpmu_init(adreno_dev);
+
+out:
+	mutex_unlock(&device->mutex);
+}
+
+static void _setup_throttling_counters(struct adreno_device *adreno_dev)
+{
+	int i, ret;
+
+	if (!adreno_is_a540(adreno_dev))
+		return;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_GPMU))
+		return;
+
+	for (i = 0; i < ADRENO_GPMU_THROTTLE_COUNTERS; i++) {
+		/* reset throttled cycles ivalue */
+		adreno_dev->busy_data.throttle_cycles[i] = 0;
+
+		if (adreno_dev->gpmu_throttle_counters[i] != 0)
+			continue;
+		ret = adreno_perfcounter_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_GPMU_PWR,
+			ADRENO_GPMU_THROTTLE_COUNTERS_BASE_REG + i,
+			&adreno_dev->gpmu_throttle_counters[i],
+			NULL,
+			PERFCOUNTER_FLAG_KERNEL);
+		WARN_ONCE(ret,  "Unable to get clock throttling counter %x\n",
+			ADRENO_GPMU_THROTTLE_COUNTERS_BASE_REG + i);
+	}
+}
+
+/*
+ * a5xx_start() - Device start
+ * @adreno_dev: Pointer to adreno device
+ *
+ * a5xx device start
+ */
+static void a5xx_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int bit;
+	int ret;
+
+	if (adreno_is_a530(adreno_dev) && ADRENO_FEATURE(adreno_dev, ADRENO_LM)
+			&& adreno_dev->lm_threshold_count == 0) {
+
+		ret = adreno_perfcounter_get(adreno_dev,
+			KGSL_PERFCOUNTER_GROUP_GPMU_PWR, 27,
+			&adreno_dev->lm_threshold_count, NULL,
+			PERFCOUNTER_FLAG_KERNEL);
+		/* Ignore noncritical ret - used for debugfs */
+		if (ret)
+			adreno_dev->lm_threshold_count = 0;
+	}
+
+	_setup_throttling_counters(adreno_dev);
+
+	adreno_vbif_start(adreno_dev, a5xx_vbif_platforms,
+			ARRAY_SIZE(a5xx_vbif_platforms));
+
+	/* Make all blocks contribute to the GPU BUSY perf counter */
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xFFFFFFFF);
+
+	/*
+	 * Enable the RBBM error reporting bits.  This lets us get
+	 * useful information on failure
+	 */
+	kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL0, 0x00000001);
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_FAULT_DETECT_MASK)) {
+		/*
+		 * We have 4 RB units, and only RB0 activity signals are
+		 * working correctly. Mask out RB1-3 activity signals
+		 * from the HW hang detection logic as per
+		 * recommendation of hardware team.
+		 */
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL11,
+				0xF0000000);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL12,
+				0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL13,
+				0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL14,
+				0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL15,
+				0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL16,
+				0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL17,
+				0xFFFFFFFF);
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_MASK_CNTL18,
+				0xFFFFFFFF);
+	}
+
+	/*
+	 * Turn on hang detection for a530 v2 and beyond. This spews a
+	 * lot of useful information into the RBBM registers on a hang.
+	 */
+	if (!adreno_is_a530v1(adreno_dev)) {
+
+		set_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
+		gpudev->irq->mask |= (1 << A5XX_INT_MISC_HANG_DETECT);
+		/*
+		 * Set hang detection threshold to 4 million cycles
+		 * (0x3FFFF*16)
+		 */
+		kgsl_regwrite(device, A5XX_RBBM_INTERFACE_HANG_INT_CNTL,
+					  (1 << 30) | 0x3FFFF);
+	}
+
+
+	/* Turn on performance counters */
+	kgsl_regwrite(device, A5XX_RBBM_PERFCTR_CNTL, 0x01);
+
+	/*
+	 * This is to increase performance by restricting VFD's cache access,
+	 * so that LRZ and other data get evicted less.
+	 */
+	kgsl_regwrite(device, A5XX_UCHE_CACHE_WAYS, 0x02);
+
+	/*
+	 * Set UCHE_WRITE_THRU_BASE to the UCHE_TRAP_BASE effectively
+	 * disabling L2 bypass
+	 */
+	kgsl_regwrite(device, A5XX_UCHE_TRAP_BASE_LO, 0xffff0000);
+	kgsl_regwrite(device, A5XX_UCHE_TRAP_BASE_HI, 0x0001ffff);
+	kgsl_regwrite(device, A5XX_UCHE_WRITE_THRU_BASE_LO, 0xffff0000);
+	kgsl_regwrite(device, A5XX_UCHE_WRITE_THRU_BASE_HI, 0x0001ffff);
+
+	/* Program the GMEM VA range for the UCHE path */
+	kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MIN_LO,
+				ADRENO_UCHE_GMEM_BASE);
+	kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MIN_HI, 0x0);
+	kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MAX_LO,
+				ADRENO_UCHE_GMEM_BASE +
+				adreno_dev->gmem_size - 1);
+	kgsl_regwrite(device, A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x0);
+
+	/*
+	 * Below CP registers are 0x0 by default, program init
+	 * values based on a5xx flavor.
+	 */
+	if (adreno_is_a505_or_a506(adreno_dev)) {
+		kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x20);
+		kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x400);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
+	} else if (adreno_is_a510(adreno_dev)) {
+		kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x20);
+		kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x20);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
+	} else {
+		kgsl_regwrite(device, A5XX_CP_MEQ_THRESHOLDS, 0x40);
+		kgsl_regwrite(device, A5XX_CP_MERCIU_SIZE, 0x40);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
+		kgsl_regwrite(device, A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
+	}
+
+	/*
+	 * vtxFifo and primFifo thresholds default values
+	 * are different.
+	 */
+	if (adreno_is_a505_or_a506(adreno_dev))
+		kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL,
+						(0x100 << 11 | 0x100 << 22));
+	else if (adreno_is_a510(adreno_dev) || adreno_is_a512(adreno_dev))
+		kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL,
+						(0x200 << 11 | 0x200 << 22));
+	else
+		kgsl_regwrite(device, A5XX_PC_DBG_ECO_CNTL,
+						(0x400 << 11 | 0x300 << 22));
+
+	/*
+	 * A5x USP LDST non valid pixel wrongly update read combine offset
+	 * In A5xx we added optimization for read combine. There could be cases
+	 * on a530 v1 there is no valid pixel but the active masks is not
+	 * cleared and the offset can be wrongly updated if the invalid address
+	 * can be combined. The wrongly latched value will make the returning
+	 * data got shifted at wrong offset. workaround this issue by disabling
+	 * LD combine, bit[25] of SP_DBG_ECO_CNTL (sp chicken bit[17]) need to
+	 * be set to 1, default is 0(enable)
+	 */
+	if (adreno_is_a530v1(adreno_dev))
+		kgsl_regrmw(device, A5XX_SP_DBG_ECO_CNTL, 0, (1 << 25));
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_TWO_PASS_USE_WFI)) {
+		/*
+		 * Set TWOPASSUSEWFI in A5XX_PC_DBG_ECO_CNTL for
+		 * microcodes after v77
+		 */
+		if ((adreno_compare_pfp_version(adreno_dev, 0x5FF077) >= 0))
+			kgsl_regrmw(device, A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
+	}
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_DISABLE_RB_DP2CLOCKGATING)) {
+		/*
+		 * Disable RB sampler datapath DP2 clock gating
+		 * optimization for 1-SP GPU's, by default it is enabled.
+		 */
+		kgsl_regrmw(device, A5XX_RB_DBG_ECO_CNT, 0, (1 << 9));
+	}
+	/*
+	 * Disable UCHE global filter as SP can invalidate/flush
+	 * independently
+	 */
+	kgsl_regwrite(device, A5XX_UCHE_MODE_CNTL, BIT(29));
+	/* Set the USE_RETENTION_FLOPS chicken bit */
+	kgsl_regwrite(device, A5XX_CP_CHICKEN_DBG, 0x02000000);
+
+	/* Enable ISDB mode if requested */
+	if (test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv)) {
+		if (!kgsl_active_count_get(device)) {
+			/*
+			 * Disable ME/PFP split timeouts when the debugger is
+			 * enabled because the CP doesn't know when a shader is
+			 * in active debug
+			 */
+			kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL1, 0x06FFFFFF);
+
+			/* Force the SP0/SP1 clocks on to enable ISDB */
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP0, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP1, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP2, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL_SP3, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP0, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP1, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP2, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL2_SP3, 0x0);
+
+			/* disable HWCG */
+			kgsl_regwrite(device, A5XX_RBBM_CLOCK_CNTL, 0x0);
+			kgsl_regwrite(device, A5XX_RBBM_ISDB_CNT, 0x0);
+		} else
+			KGSL_CORE_ERR(
+				"Active count failed while turning on ISDB.");
+	} else {
+		/* if not in ISDB mode enable ME/PFP split notification */
+		kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL1, 0xA6FFFFFF);
+	}
+
+	kgsl_regwrite(device, A5XX_RBBM_AHB_CNTL2, 0x0000003F);
+
+	if (!of_property_read_u32(device->pdev->dev.of_node,
+		"qcom,highest-bank-bit", &bit)) {
+		if (bit >= 13 && bit <= 16) {
+			bit = (bit - 13) & 0x03;
+
+			/*
+			 * Program the highest DDR bank bit that was passed in
+			 * from the DT in a handful of registers. Some of these
+			 * registers will also be written by the UMD, but we
+			 * want to program them in case we happen to use the
+			 * UCHE before the UMD does
+			 */
+
+			kgsl_regwrite(device, A5XX_TPL1_MODE_CNTL, bit << 7);
+			kgsl_regwrite(device, A5XX_RB_MODE_CNTL, bit << 1);
+			if (adreno_is_a540(adreno_dev) ||
+				adreno_is_a512(adreno_dev))
+				kgsl_regwrite(device, A5XX_UCHE_DBG_ECO_CNTL_2,
+					bit);
+		}
+
+	}
+
+	/*
+	 * VPC corner case with local memory load kill leads to corrupt
+	 * internal state. Normal Disable does not work for all a5x chips.
+	 * So do the following setting to disable it.
+	 */
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_DISABLE_LMLOADKILL)) {
+		kgsl_regrmw(device, A5XX_VPC_DBG_ECO_CNTL, 0, 0x1 << 23);
+		kgsl_regrmw(device, A5XX_HLSQ_DBG_ECO_CNTL, 0x1 << 18, 0);
+	}
+
+	a5xx_preemption_start(adreno_dev);
+	a5xx_protect_init(adreno_dev);
+}
+
+/*
+ * Follow the ME_INIT sequence with a preemption yield to allow the GPU to move
+ * to a different ringbuffer, if desired
+ */
+static int _preemption_init(
+			struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb, unsigned int *cmds,
+			struct kgsl_context *context)
+{
+	unsigned int *cmds_orig = cmds;
+	uint64_t gpuaddr = rb->preemption_desc.gpuaddr;
+
+	/* Turn CP protection OFF */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 0;
+	/*
+	 * CP during context switch will save context switch info to
+	 * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR
+	 */
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1);
+	*cmds++ = lower_32_bits(gpuaddr);
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1);
+	*cmds++ = upper_32_bits(gpuaddr);
+
+	/* Turn CP protection ON */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 1;
+
+	*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1);
+	*cmds++ = 0;
+
+	*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1);
+	*cmds++ = 1;
+
+	/* Enable yield in RB only */
+	*cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1);
+	*cmds++ = 1;
+
+	*cmds++ = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	cmds += cp_gpuaddr(adreno_dev, cmds, 0x0);
+	*cmds++ = 0;
+	/* generate interrupt on preemption completion */
+	*cmds++ = 1;
+
+	return cmds - cmds_orig;
+}
+
+static int a5xx_post_start(struct adreno_device *adreno_dev)
+{
+	int ret;
+	unsigned int *cmds, *start;
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+
+	if (!adreno_is_a530(adreno_dev) &&
+		!adreno_is_preemption_enabled(adreno_dev))
+		return 0;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 42);
+	if (IS_ERR(cmds)) {
+		struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+		KGSL_DRV_ERR(device, "error allocating preemtion init cmds");
+		return PTR_ERR(cmds);
+	}
+	start = cmds;
+
+	/*
+	 * Send a pipeline stat event whenever the GPU gets powered up
+	 * to cause misbehaving perf counters to start ticking
+	 */
+	if (adreno_is_a530(adreno_dev)) {
+		*cmds++ = cp_packet(adreno_dev, CP_EVENT_WRITE, 1);
+		*cmds++ = 0xF;
+	}
+
+	if (adreno_is_preemption_enabled(adreno_dev))
+		cmds += _preemption_init(adreno_dev, rb, cmds, NULL);
+
+	rb->_wptr = rb->_wptr - (42 - (cmds - start));
+
+	ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
+	if (ret)
+		spin_idle_debug(KGSL_DEVICE(adreno_dev),
+				"hw initialization failed to idle\n");
+
+	return ret;
+}
+
+static int a5xx_gpmu_init(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	/* Set up LM before initializing the GPMU */
+	a5xx_lm_init(adreno_dev);
+
+	/* Enable SPTP based power collapse before enabling GPMU */
+	a5xx_enable_pc(adreno_dev);
+
+	ret = a5xx_gpmu_start(adreno_dev);
+	if (ret)
+		return ret;
+
+	/* Enable limits management */
+	a5xx_lm_enable(adreno_dev);
+	return 0;
+}
+
+static int a5xx_switch_to_unsecure_mode(struct adreno_device *adreno_dev,
+				struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds;
+	int ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 2);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	cmds += cp_secure_mode(adreno_dev, cmds, 0);
+
+	ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
+	if (ret)
+		spin_idle_debug(KGSL_DEVICE(adreno_dev),
+				"Switch to unsecure failed to idle\n");
+
+	return ret;
+}
+
+/*
+ * a5xx_microcode_load() - Load microcode
+ * @adreno_dev: Pointer to adreno device
+ */
+static int a5xx_microcode_load(struct adreno_device *adreno_dev)
+{
+	void *ptr;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	uint64_t gpuaddr;
+
+	gpuaddr = adreno_dev->pm4.gpuaddr;
+	kgsl_regwrite(device, A5XX_CP_PM4_INSTR_BASE_LO,
+				lower_32_bits(gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_PM4_INSTR_BASE_HI,
+				upper_32_bits(gpuaddr));
+
+	gpuaddr = adreno_dev->pfp.gpuaddr;
+	kgsl_regwrite(device, A5XX_CP_PFP_INSTR_BASE_LO,
+				lower_32_bits(gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_PFP_INSTR_BASE_HI,
+				upper_32_bits(gpuaddr));
+
+	/*
+	 * Resume call to write the zap shader base address into the
+	 * appropriate register,
+	 * skip if retention is supported for the CPZ register
+	 */
+	if (zap_ucode_loaded && !(ADRENO_FEATURE(adreno_dev,
+		ADRENO_CPZ_RETENTION))) {
+		int ret;
+		struct scm_desc desc = {0};
+
+		desc.args[0] = 0;
+		desc.args[1] = 13;
+		desc.arginfo = SCM_ARGS(2);
+
+		ret = scm_call2(SCM_SIP_FNID(SCM_SVC_BOOT, 0xA), &desc);
+		if (ret) {
+			pr_err("SCM resume call failed with error %d\n", ret);
+			return ret;
+		}
+
+	}
+
+	/* Load the zap shader firmware through PIL if its available */
+	if (adreno_dev->gpucore->zap_name && !zap_ucode_loaded) {
+		ptr = subsystem_get(adreno_dev->gpucore->zap_name);
+
+		/* Return error if the zap shader cannot be loaded */
+		if (IS_ERR_OR_NULL(ptr))
+			return (ptr == NULL) ? -ENODEV : PTR_ERR(ptr);
+
+		zap_ucode_loaded = 1;
+	}
+
+	return 0;
+}
+
+static int _me_init_ucode_workarounds(struct adreno_device *adreno_dev)
+{
+	switch (ADRENO_GPUREV(adreno_dev)) {
+	case ADRENO_REV_A510:
+		return 0x00000001; /* Ucode workaround for token end syncs */
+	case ADRENO_REV_A505:
+	case ADRENO_REV_A506:
+	case ADRENO_REV_A530:
+		/*
+		 * Ucode workarounds for token end syncs,
+		 * WFI after every direct-render 3D mode draw and
+		 * WFI after every 2D Mode 3 draw.
+		 */
+		return 0x0000000B;
+	case ADRENO_REV_A540:
+		/*
+		 * WFI after every direct-render 3D mode draw and
+		 * WFI after every 2D Mode 3 draw. This is needed
+		 * only on a540v1.
+		 */
+		if (adreno_is_a540v1(adreno_dev))
+			return 0x0000000A;
+	default:
+		return 0x00000000; /* No ucode workarounds enabled */
+	}
+}
+
+/*
+ * CP_INIT_MAX_CONTEXT bit tells if the multiple hardware contexts can
+ * be used at once of if they should be serialized
+ */
+#define CP_INIT_MAX_CONTEXT BIT(0)
+
+/* Enables register protection mode */
+#define CP_INIT_ERROR_DETECTION_CONTROL BIT(1)
+
+/* Header dump information */
+#define CP_INIT_HEADER_DUMP BIT(2) /* Reserved */
+
+/* Default Reset states enabled for PFP and ME */
+#define CP_INIT_DEFAULT_RESET_STATE BIT(3)
+
+/* Drawcall filter range */
+#define CP_INIT_DRAWCALL_FILTER_RANGE BIT(4)
+
+/* Ucode workaround masks */
+#define CP_INIT_UCODE_WORKAROUND_MASK BIT(5)
+
+#define CP_INIT_MASK (CP_INIT_MAX_CONTEXT | \
+		CP_INIT_ERROR_DETECTION_CONTROL | \
+		CP_INIT_HEADER_DUMP | \
+		CP_INIT_DEFAULT_RESET_STATE | \
+		CP_INIT_UCODE_WORKAROUND_MASK)
+
+static void _set_ordinals(struct adreno_device *adreno_dev,
+		unsigned int *cmds, unsigned int count)
+{
+	unsigned int *start = cmds;
+
+	/* Enabled ordinal mask */
+	*cmds++ = CP_INIT_MASK;
+
+	if (CP_INIT_MASK & CP_INIT_MAX_CONTEXT) {
+		/*
+		 * Multiple HW ctxs are unreliable on a530v1,
+		 * use single hw context.
+		 * Use multiple contexts if bit set, otherwise serialize:
+		 *      3D (bit 0) 2D (bit 1)
+		 */
+		if (adreno_is_a530v1(adreno_dev))
+			*cmds++ = 0x00000000;
+		else
+			*cmds++ = 0x00000003;
+	}
+
+	if (CP_INIT_MASK & CP_INIT_ERROR_DETECTION_CONTROL)
+		*cmds++ = 0x20000000;
+
+	if (CP_INIT_MASK & CP_INIT_HEADER_DUMP) {
+		/* Header dump address */
+		*cmds++ = 0x00000000;
+		/* Header dump enable and dump size */
+		*cmds++ = 0x00000000;
+	}
+
+	if (CP_INIT_MASK & CP_INIT_DRAWCALL_FILTER_RANGE) {
+		/* Start range */
+		*cmds++ = 0x00000000;
+		/* End range (inclusive) */
+		*cmds++ = 0x00000000;
+	}
+
+	if (CP_INIT_MASK & CP_INIT_UCODE_WORKAROUND_MASK)
+		*cmds++ = _me_init_ucode_workarounds(adreno_dev);
+
+	/* Pad rest of the cmds with 0's */
+	while ((unsigned int)(cmds - start) < count)
+		*cmds++ = 0x0;
+}
+
+static int a5xx_critical_packet_submit(struct adreno_device *adreno_dev,
+					struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds;
+	int ret;
+
+	if (!critical_packet_constructed)
+		return 0;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 4);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, crit_pkts.gpuaddr);
+	*cmds++ = crit_pkts_dwords;
+
+	ret = adreno_ringbuffer_submit_spin(rb, NULL, 20);
+	if (ret)
+		spin_idle_debug(KGSL_DEVICE(adreno_dev),
+			"Critical packet submission failed to idle\n");
+
+	return ret;
+}
+
+/*
+ * a5xx_send_me_init() - Initialize ringbuffer
+ * @adreno_dev: Pointer to adreno device
+ * @rb: Pointer to the ringbuffer of device
+ *
+ * Submit commands for ME initialization,
+ */
+static int a5xx_send_me_init(struct adreno_device *adreno_dev,
+			 struct adreno_ringbuffer *rb)
+{
+	unsigned int *cmds;
+	int ret;
+
+	cmds = adreno_ringbuffer_allocspace(rb, 9);
+	if (IS_ERR(cmds))
+		return PTR_ERR(cmds);
+	if (cmds == NULL)
+		return -ENOSPC;
+
+	*cmds++ = cp_type7_packet(CP_ME_INIT, 8);
+
+	_set_ordinals(adreno_dev, cmds, 8);
+
+	ret = adreno_ringbuffer_submit_spin(rb, NULL, 2000);
+	if (ret)
+		spin_idle_debug(KGSL_DEVICE(adreno_dev),
+				"CP initialization failed to idle\n");
+
+	return ret;
+}
+
+static int a5xx_set_unsecured_mode(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb)
+{
+	int ret = 0;
+
+	if (ADRENO_QUIRK(adreno_dev, ADRENO_QUIRK_CRITICAL_PACKETS)) {
+		ret = a5xx_critical_packet_submit(adreno_dev, rb);
+		if (ret)
+			return ret;
+	}
+
+	/* GPU comes up in secured mode, make it unsecured by default */
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_CONTENT_PROTECTION))
+		ret = a5xx_switch_to_unsecure_mode(adreno_dev, rb);
+	else
+		kgsl_regwrite(&adreno_dev->dev,
+				A5XX_RBBM_SECVID_TRUST_CNTL, 0x0);
+
+	return ret;
+}
+
+/*
+ * a5xx_rb_start() - Start the ringbuffer
+ * @adreno_dev: Pointer to adreno device
+ * @start_type: Warm or cold start
+ */
+static int a5xx_rb_start(struct adreno_device *adreno_dev,
+			 unsigned int start_type)
+{
+	struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+	struct kgsl_device *device = &adreno_dev->dev;
+	uint64_t addr;
+	int ret;
+
+	addr = SCRATCH_RPTR_GPU_ADDR(device, rb->id);
+
+	adreno_writereg64(adreno_dev, ADRENO_REG_CP_RB_RPTR_ADDR_LO,
+			ADRENO_REG_CP_RB_RPTR_ADDR_HI, addr);
+
+	/*
+	 * The size of the ringbuffer in the hardware is the log2
+	 * representation of the size in quadwords (sizedwords / 2).
+	 * Also disable the host RPTR shadow register as it might be unreliable
+	 * in certain circumstances.
+	 */
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_CNTL,
+		A5XX_CP_RB_CNTL_DEFAULT);
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_BASE,
+			rb->buffer_desc.gpuaddr);
+
+	ret = a5xx_microcode_load(adreno_dev);
+	if (ret)
+		return ret;
+
+	/* clear ME_HALT to start micro engine */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, 0);
+
+	ret = a5xx_send_me_init(adreno_dev, rb);
+	if (ret)
+		return ret;
+
+	/* GPU comes up in secured mode, make it unsecured by default */
+	ret = a5xx_set_unsecured_mode(adreno_dev, rb);
+	if (ret)
+		return ret;
+
+	ret = a5xx_gpmu_init(adreno_dev);
+	if (ret)
+		return ret;
+
+	a5xx_post_start(adreno_dev);
+
+	return 0;
+}
+
+static int _load_firmware(struct kgsl_device *device, const char *fwfile,
+			  struct kgsl_memdesc *ucode, size_t *ucode_size,
+			  unsigned int *ucode_version)
+{
+	const struct firmware *fw = NULL;
+	int ret;
+
+	ret = request_firmware(&fw, fwfile, device->dev);
+
+	if (ret) {
+		KGSL_DRV_ERR(device, "request_firmware(%s) failed: %d\n",
+				fwfile, ret);
+		return ret;
+	}
+
+	ret = kgsl_allocate_global(device, ucode, fw->size - 4,
+				KGSL_MEMFLAGS_GPUREADONLY, 0, "ucode");
+
+	if (ret)
+		goto done;
+
+	memcpy(ucode->hostptr, &fw->data[4], fw->size - 4);
+	*ucode_size = (fw->size - 4) / sizeof(uint32_t);
+	*ucode_version = *(unsigned int *)&fw->data[4];
+
+done:
+	release_firmware(fw);
+
+	return ret;
+}
+
+/*
+ * a5xx_microcode_read() - Read microcode
+ * @adreno_dev: Pointer to adreno device
+ */
+static int a5xx_microcode_read(struct adreno_device *adreno_dev)
+{
+	int ret;
+
+	if (adreno_dev->pm4.hostptr == NULL) {
+		ret = _load_firmware(KGSL_DEVICE(adreno_dev),
+				 adreno_dev->gpucore->pm4fw_name,
+				 &adreno_dev->pm4,
+				 &adreno_dev->pm4_fw_size,
+				 &adreno_dev->pm4_fw_version);
+		if (ret)
+			return ret;
+	}
+
+	if (adreno_dev->pfp.hostptr == NULL) {
+		ret = _load_firmware(KGSL_DEVICE(adreno_dev),
+				 adreno_dev->gpucore->pfpfw_name,
+				 &adreno_dev->pfp,
+				 &adreno_dev->pfp_fw_size,
+				 &adreno_dev->pfp_fw_version);
+		if (ret)
+			return ret;
+	}
+
+	ret = _load_gpmu_firmware(adreno_dev);
+	if (ret)
+		return ret;
+
+	_load_regfile(adreno_dev);
+
+	return ret;
+}
+
+static struct adreno_perfcount_register a5xx_perfcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_0_LO,
+		A5XX_RBBM_PERFCTR_CP_0_HI, 0, A5XX_CP_PERFCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_1_LO,
+		A5XX_RBBM_PERFCTR_CP_1_HI, 1, A5XX_CP_PERFCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_2_LO,
+		A5XX_RBBM_PERFCTR_CP_2_HI, 2, A5XX_CP_PERFCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_3_LO,
+		A5XX_RBBM_PERFCTR_CP_3_HI, 3, A5XX_CP_PERFCTR_CP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_4_LO,
+		A5XX_RBBM_PERFCTR_CP_4_HI, 4, A5XX_CP_PERFCTR_CP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_5_LO,
+		A5XX_RBBM_PERFCTR_CP_5_HI, 5, A5XX_CP_PERFCTR_CP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_6_LO,
+		A5XX_RBBM_PERFCTR_CP_6_HI, 6, A5XX_CP_PERFCTR_CP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CP_7_LO,
+		A5XX_RBBM_PERFCTR_CP_7_HI, 7, A5XX_CP_PERFCTR_CP_SEL_7 },
+};
+
+/*
+ * Note that PERFCTR_RBBM_0 is missing - it is used to emulate the PWR counters.
+ * See below.
+ */
+static struct adreno_perfcount_register a5xx_perfcounters_rbbm[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_1_LO,
+		A5XX_RBBM_PERFCTR_RBBM_1_HI, 9, A5XX_RBBM_PERFCTR_RBBM_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_2_LO,
+		A5XX_RBBM_PERFCTR_RBBM_2_HI, 10, A5XX_RBBM_PERFCTR_RBBM_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_3_LO,
+		A5XX_RBBM_PERFCTR_RBBM_3_HI, 11, A5XX_RBBM_PERFCTR_RBBM_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_pc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_0_LO,
+		A5XX_RBBM_PERFCTR_PC_0_HI, 12, A5XX_PC_PERFCTR_PC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_1_LO,
+		A5XX_RBBM_PERFCTR_PC_1_HI, 13, A5XX_PC_PERFCTR_PC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_2_LO,
+		A5XX_RBBM_PERFCTR_PC_2_HI, 14, A5XX_PC_PERFCTR_PC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_3_LO,
+		A5XX_RBBM_PERFCTR_PC_3_HI, 15, A5XX_PC_PERFCTR_PC_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_4_LO,
+		A5XX_RBBM_PERFCTR_PC_4_HI, 16, A5XX_PC_PERFCTR_PC_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_5_LO,
+		A5XX_RBBM_PERFCTR_PC_5_HI, 17, A5XX_PC_PERFCTR_PC_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_6_LO,
+		A5XX_RBBM_PERFCTR_PC_6_HI, 18, A5XX_PC_PERFCTR_PC_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_PC_7_LO,
+		A5XX_RBBM_PERFCTR_PC_7_HI, 19, A5XX_PC_PERFCTR_PC_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vfd[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_0_LO,
+		A5XX_RBBM_PERFCTR_VFD_0_HI, 20, A5XX_VFD_PERFCTR_VFD_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_1_LO,
+		A5XX_RBBM_PERFCTR_VFD_1_HI, 21, A5XX_VFD_PERFCTR_VFD_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_2_LO,
+		A5XX_RBBM_PERFCTR_VFD_2_HI, 22, A5XX_VFD_PERFCTR_VFD_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_3_LO,
+		A5XX_RBBM_PERFCTR_VFD_3_HI, 23, A5XX_VFD_PERFCTR_VFD_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_4_LO,
+		A5XX_RBBM_PERFCTR_VFD_4_HI, 24, A5XX_VFD_PERFCTR_VFD_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_5_LO,
+		A5XX_RBBM_PERFCTR_VFD_5_HI, 25, A5XX_VFD_PERFCTR_VFD_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_6_LO,
+		A5XX_RBBM_PERFCTR_VFD_6_HI, 26, A5XX_VFD_PERFCTR_VFD_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VFD_7_LO,
+		A5XX_RBBM_PERFCTR_VFD_7_HI, 27, A5XX_VFD_PERFCTR_VFD_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_hlsq[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_0_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_0_HI, 28, A5XX_HLSQ_PERFCTR_HLSQ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_1_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_1_HI, 29, A5XX_HLSQ_PERFCTR_HLSQ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_2_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_2_HI, 30, A5XX_HLSQ_PERFCTR_HLSQ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_3_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_3_HI, 31, A5XX_HLSQ_PERFCTR_HLSQ_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_4_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_4_HI, 32, A5XX_HLSQ_PERFCTR_HLSQ_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_5_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_5_HI, 33, A5XX_HLSQ_PERFCTR_HLSQ_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_6_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_6_HI, 34, A5XX_HLSQ_PERFCTR_HLSQ_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_HLSQ_7_LO,
+		A5XX_RBBM_PERFCTR_HLSQ_7_HI, 35, A5XX_HLSQ_PERFCTR_HLSQ_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vpc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_0_LO,
+		A5XX_RBBM_PERFCTR_VPC_0_HI, 36, A5XX_VPC_PERFCTR_VPC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_1_LO,
+		A5XX_RBBM_PERFCTR_VPC_1_HI, 37, A5XX_VPC_PERFCTR_VPC_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_2_LO,
+		A5XX_RBBM_PERFCTR_VPC_2_HI, 38, A5XX_VPC_PERFCTR_VPC_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VPC_3_LO,
+		A5XX_RBBM_PERFCTR_VPC_3_HI, 39, A5XX_VPC_PERFCTR_VPC_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_0_LO,
+		A5XX_RBBM_PERFCTR_CCU_0_HI, 40, A5XX_RB_PERFCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_1_LO,
+		A5XX_RBBM_PERFCTR_CCU_1_HI, 41, A5XX_RB_PERFCTR_CCU_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_2_LO,
+		A5XX_RBBM_PERFCTR_CCU_2_HI, 42, A5XX_RB_PERFCTR_CCU_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CCU_3_LO,
+		A5XX_RBBM_PERFCTR_CCU_3_HI, 43, A5XX_RB_PERFCTR_CCU_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_tse[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_0_LO,
+		A5XX_RBBM_PERFCTR_TSE_0_HI, 44, A5XX_GRAS_PERFCTR_TSE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_1_LO,
+		A5XX_RBBM_PERFCTR_TSE_1_HI, 45, A5XX_GRAS_PERFCTR_TSE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_2_LO,
+		A5XX_RBBM_PERFCTR_TSE_2_HI, 46, A5XX_GRAS_PERFCTR_TSE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TSE_3_LO,
+		A5XX_RBBM_PERFCTR_TSE_3_HI, 47, A5XX_GRAS_PERFCTR_TSE_SEL_3 },
+};
+
+
+static struct adreno_perfcount_register a5xx_perfcounters_ras[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_0_LO,
+		A5XX_RBBM_PERFCTR_RAS_0_HI, 48, A5XX_GRAS_PERFCTR_RAS_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_1_LO,
+		A5XX_RBBM_PERFCTR_RAS_1_HI, 49, A5XX_GRAS_PERFCTR_RAS_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_2_LO,
+		A5XX_RBBM_PERFCTR_RAS_2_HI, 50, A5XX_GRAS_PERFCTR_RAS_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RAS_3_LO,
+		A5XX_RBBM_PERFCTR_RAS_3_HI, 51, A5XX_GRAS_PERFCTR_RAS_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_0_LO,
+		A5XX_RBBM_PERFCTR_UCHE_0_HI, 52, A5XX_UCHE_PERFCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_1_LO,
+		A5XX_RBBM_PERFCTR_UCHE_1_HI, 53, A5XX_UCHE_PERFCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_2_LO,
+		A5XX_RBBM_PERFCTR_UCHE_2_HI, 54, A5XX_UCHE_PERFCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_3_LO,
+		A5XX_RBBM_PERFCTR_UCHE_3_HI, 55, A5XX_UCHE_PERFCTR_UCHE_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_4_LO,
+		A5XX_RBBM_PERFCTR_UCHE_4_HI, 56, A5XX_UCHE_PERFCTR_UCHE_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_5_LO,
+		A5XX_RBBM_PERFCTR_UCHE_5_HI, 57, A5XX_UCHE_PERFCTR_UCHE_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_6_LO,
+		A5XX_RBBM_PERFCTR_UCHE_6_HI, 58, A5XX_UCHE_PERFCTR_UCHE_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_UCHE_7_LO,
+		A5XX_RBBM_PERFCTR_UCHE_7_HI, 59, A5XX_UCHE_PERFCTR_UCHE_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_0_LO,
+		A5XX_RBBM_PERFCTR_TP_0_HI, 60, A5XX_TPL1_PERFCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_1_LO,
+		A5XX_RBBM_PERFCTR_TP_1_HI, 61, A5XX_TPL1_PERFCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_2_LO,
+		A5XX_RBBM_PERFCTR_TP_2_HI, 62, A5XX_TPL1_PERFCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_3_LO,
+		A5XX_RBBM_PERFCTR_TP_3_HI, 63, A5XX_TPL1_PERFCTR_TP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_4_LO,
+		A5XX_RBBM_PERFCTR_TP_4_HI, 64, A5XX_TPL1_PERFCTR_TP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_5_LO,
+		A5XX_RBBM_PERFCTR_TP_5_HI, 65, A5XX_TPL1_PERFCTR_TP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_6_LO,
+		A5XX_RBBM_PERFCTR_TP_6_HI, 66, A5XX_TPL1_PERFCTR_TP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_TP_7_LO,
+		A5XX_RBBM_PERFCTR_TP_7_HI, 67, A5XX_TPL1_PERFCTR_TP_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_0_LO,
+		A5XX_RBBM_PERFCTR_SP_0_HI, 68, A5XX_SP_PERFCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_1_LO,
+		A5XX_RBBM_PERFCTR_SP_1_HI, 69, A5XX_SP_PERFCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_2_LO,
+		A5XX_RBBM_PERFCTR_SP_2_HI, 70, A5XX_SP_PERFCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_3_LO,
+		A5XX_RBBM_PERFCTR_SP_3_HI, 71, A5XX_SP_PERFCTR_SP_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_4_LO,
+		A5XX_RBBM_PERFCTR_SP_4_HI, 72, A5XX_SP_PERFCTR_SP_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_5_LO,
+		A5XX_RBBM_PERFCTR_SP_5_HI, 73, A5XX_SP_PERFCTR_SP_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_6_LO,
+		A5XX_RBBM_PERFCTR_SP_6_HI, 74, A5XX_SP_PERFCTR_SP_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_7_LO,
+		A5XX_RBBM_PERFCTR_SP_7_HI, 75, A5XX_SP_PERFCTR_SP_SEL_7 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_8_LO,
+		A5XX_RBBM_PERFCTR_SP_8_HI, 76, A5XX_SP_PERFCTR_SP_SEL_8 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_9_LO,
+		A5XX_RBBM_PERFCTR_SP_9_HI, 77, A5XX_SP_PERFCTR_SP_SEL_9 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_10_LO,
+		A5XX_RBBM_PERFCTR_SP_10_HI, 78, A5XX_SP_PERFCTR_SP_SEL_10 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_SP_11_LO,
+		A5XX_RBBM_PERFCTR_SP_11_HI, 79, A5XX_SP_PERFCTR_SP_SEL_11 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_0_LO,
+		A5XX_RBBM_PERFCTR_RB_0_HI, 80, A5XX_RB_PERFCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_1_LO,
+		A5XX_RBBM_PERFCTR_RB_1_HI, 81, A5XX_RB_PERFCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_2_LO,
+		A5XX_RBBM_PERFCTR_RB_2_HI, 82, A5XX_RB_PERFCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_3_LO,
+		A5XX_RBBM_PERFCTR_RB_3_HI, 83, A5XX_RB_PERFCTR_RB_SEL_3 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_4_LO,
+		A5XX_RBBM_PERFCTR_RB_4_HI, 84, A5XX_RB_PERFCTR_RB_SEL_4 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_5_LO,
+		A5XX_RBBM_PERFCTR_RB_5_HI, 85, A5XX_RB_PERFCTR_RB_SEL_5 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_6_LO,
+		A5XX_RBBM_PERFCTR_RB_6_HI, 86, A5XX_RB_PERFCTR_RB_SEL_6 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RB_7_LO,
+		A5XX_RBBM_PERFCTR_RB_7_HI, 87, A5XX_RB_PERFCTR_RB_SEL_7 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vsc[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_0_LO,
+		A5XX_RBBM_PERFCTR_VSC_0_HI, 88, A5XX_VSC_PERFCTR_VSC_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_VSC_1_LO,
+		A5XX_RBBM_PERFCTR_VSC_1_HI, 89, A5XX_VSC_PERFCTR_VSC_SEL_1 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_lrz[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_0_LO,
+		A5XX_RBBM_PERFCTR_LRZ_0_HI, 90, A5XX_GRAS_PERFCTR_LRZ_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_1_LO,
+		A5XX_RBBM_PERFCTR_LRZ_1_HI, 91, A5XX_GRAS_PERFCTR_LRZ_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_2_LO,
+		A5XX_RBBM_PERFCTR_LRZ_2_HI, 92, A5XX_GRAS_PERFCTR_LRZ_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_LRZ_3_LO,
+		A5XX_RBBM_PERFCTR_LRZ_3_HI, 93, A5XX_GRAS_PERFCTR_LRZ_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_cmp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_0_LO,
+		A5XX_RBBM_PERFCTR_CMP_0_HI, 94, A5XX_RB_PERFCTR_CMP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_1_LO,
+		A5XX_RBBM_PERFCTR_CMP_1_HI, 95, A5XX_RB_PERFCTR_CMP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_2_LO,
+		A5XX_RBBM_PERFCTR_CMP_2_HI, 96, A5XX_RB_PERFCTR_CMP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_CMP_3_LO,
+		A5XX_RBBM_PERFCTR_CMP_3_HI, 97, A5XX_RB_PERFCTR_CMP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vbif[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW0,
+		A5XX_VBIF_PERF_CNT_HIGH0, -1, A5XX_VBIF_PERF_CNT_SEL0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW1,
+		A5XX_VBIF_PERF_CNT_HIGH1, -1, A5XX_VBIF_PERF_CNT_SEL1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW2,
+		A5XX_VBIF_PERF_CNT_HIGH2, -1, A5XX_VBIF_PERF_CNT_SEL2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_CNT_LOW3,
+		A5XX_VBIF_PERF_CNT_HIGH3, -1, A5XX_VBIF_PERF_CNT_SEL3 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_vbif_pwr[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW0,
+		A5XX_VBIF_PERF_PWR_CNT_HIGH0, -1, A5XX_VBIF_PERF_PWR_CNT_EN0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW1,
+		A5XX_VBIF_PERF_PWR_CNT_HIGH1, -1, A5XX_VBIF_PERF_PWR_CNT_EN1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_VBIF_PERF_PWR_CNT_LOW2,
+		A5XX_VBIF_PERF_PWR_CNT_HIGH2, -1, A5XX_VBIF_PERF_PWR_CNT_EN2 },
+};
+
+static struct adreno_perfcount_register a5xx_perfcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_ALWAYSON_COUNTER_LO,
+		A5XX_RBBM_ALWAYSON_COUNTER_HI, -1 },
+};
+
+/*
+ * 5XX targets don't really have physical PERFCTR_PWR registers - we emulate
+ * them using similar performance counters from the RBBM block. The difference
+ * between using this group and the RBBM group is that the RBBM counters are
+ * reloaded after a power collapse which is not how the PWR counters behaved on
+ * legacy hardware. In order to limit the disruption on the rest of the system
+ * we go out of our way to ensure backwards compatibility. Since RBBM counters
+ * are in short supply, we don't emulate PWR:0 which nobody uses - mark it as
+ * broken.
+ */
+static struct adreno_perfcount_register a5xx_perfcounters_pwr[] = {
+	{ KGSL_PERFCOUNTER_BROKEN, 0, 0, 0, 0, -1, 0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RBBM_PERFCTR_RBBM_0_LO,
+		A5XX_RBBM_PERFCTR_RBBM_0_HI, -1, 0},
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_sp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_0_LO,
+		A5XX_SP_POWER_COUNTER_0_HI, -1, A5XX_SP_POWERCTR_SP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_1_LO,
+		A5XX_SP_POWER_COUNTER_1_HI, -1, A5XX_SP_POWERCTR_SP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_2_LO,
+		A5XX_SP_POWER_COUNTER_2_HI, -1, A5XX_SP_POWERCTR_SP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_SP_POWER_COUNTER_3_LO,
+		A5XX_SP_POWER_COUNTER_3_HI, -1, A5XX_SP_POWERCTR_SP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_tp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_0_LO,
+		A5XX_TP_POWER_COUNTER_0_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_1_LO,
+		A5XX_TP_POWER_COUNTER_1_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_2_LO,
+		A5XX_TP_POWER_COUNTER_2_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_TP_POWER_COUNTER_3_LO,
+		A5XX_TP_POWER_COUNTER_3_HI, -1, A5XX_TPL1_POWERCTR_TP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_rb[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_0_LO,
+		A5XX_RB_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_RB_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_1_LO,
+		A5XX_RB_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_RB_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_2_LO,
+		A5XX_RB_POWER_COUNTER_2_HI, -1, A5XX_RB_POWERCTR_RB_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_RB_POWER_COUNTER_3_LO,
+		A5XX_RB_POWER_COUNTER_3_HI, -1, A5XX_RB_POWERCTR_RB_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_ccu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_0_LO,
+		A5XX_CCU_POWER_COUNTER_0_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CCU_POWER_COUNTER_1_LO,
+		A5XX_CCU_POWER_COUNTER_1_HI, -1, A5XX_RB_POWERCTR_CCU_SEL_1 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_uche[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_0_LO,
+		A5XX_UCHE_POWER_COUNTER_0_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_1_LO,
+		A5XX_UCHE_POWER_COUNTER_1_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_2_LO,
+		A5XX_UCHE_POWER_COUNTER_2_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_UCHE_POWER_COUNTER_3_LO,
+		A5XX_UCHE_POWER_COUNTER_3_HI, -1,
+		A5XX_UCHE_POWERCTR_UCHE_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_cp[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_0_LO,
+		A5XX_CP_POWER_COUNTER_0_HI, -1, A5XX_CP_POWERCTR_CP_SEL_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_1_LO,
+		A5XX_CP_POWER_COUNTER_1_HI, -1, A5XX_CP_POWERCTR_CP_SEL_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_2_LO,
+		A5XX_CP_POWER_COUNTER_2_HI, -1, A5XX_CP_POWERCTR_CP_SEL_2 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_CP_POWER_COUNTER_3_LO,
+		A5XX_CP_POWER_COUNTER_3_HI, -1, A5XX_CP_POWERCTR_CP_SEL_3 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_gpmu[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_0_LO,
+		A5XX_GPMU_POWER_COUNTER_0_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_1_LO,
+		A5XX_GPMU_POWER_COUNTER_1_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_2_LO,
+		A5XX_GPMU_POWER_COUNTER_2_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_3_LO,
+		A5XX_GPMU_POWER_COUNTER_3_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_0 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_4_LO,
+		A5XX_GPMU_POWER_COUNTER_4_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_1 },
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_POWER_COUNTER_5_LO,
+		A5XX_GPMU_POWER_COUNTER_5_HI, -1,
+		A5XX_GPMU_POWER_COUNTER_SELECT_1 },
+};
+
+static struct adreno_perfcount_register a5xx_pwrcounters_alwayson[] = {
+	{ KGSL_PERFCOUNTER_NOT_USED, 0, 0, A5XX_GPMU_ALWAYS_ON_COUNTER_LO,
+		A5XX_GPMU_ALWAYS_ON_COUNTER_HI, -1 },
+};
+
+#define A5XX_PERFCOUNTER_GROUP(offset, name) \
+	ADRENO_PERFCOUNTER_GROUP(a5xx, offset, name)
+
+#define A5XX_PERFCOUNTER_GROUP_FLAGS(offset, name, flags) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(a5xx, offset, name, flags)
+
+#define A5XX_POWER_COUNTER_GROUP(offset, name) \
+	ADRENO_POWER_COUNTER_GROUP(a5xx, offset, name)
+
+static struct adreno_perfcount_group a5xx_perfcounter_groups
+				[KGSL_PERFCOUNTER_GROUP_MAX] = {
+	A5XX_PERFCOUNTER_GROUP(CP, cp),
+	A5XX_PERFCOUNTER_GROUP(RBBM, rbbm),
+	A5XX_PERFCOUNTER_GROUP(PC, pc),
+	A5XX_PERFCOUNTER_GROUP(VFD, vfd),
+	A5XX_PERFCOUNTER_GROUP(HLSQ, hlsq),
+	A5XX_PERFCOUNTER_GROUP(VPC, vpc),
+	A5XX_PERFCOUNTER_GROUP(CCU, ccu),
+	A5XX_PERFCOUNTER_GROUP(CMP, cmp),
+	A5XX_PERFCOUNTER_GROUP(TSE, tse),
+	A5XX_PERFCOUNTER_GROUP(RAS, ras),
+	A5XX_PERFCOUNTER_GROUP(LRZ, lrz),
+	A5XX_PERFCOUNTER_GROUP(UCHE, uche),
+	A5XX_PERFCOUNTER_GROUP(TP, tp),
+	A5XX_PERFCOUNTER_GROUP(SP, sp),
+	A5XX_PERFCOUNTER_GROUP(RB, rb),
+	A5XX_PERFCOUNTER_GROUP(VSC, vsc),
+	A5XX_PERFCOUNTER_GROUP_FLAGS(PWR, pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+	A5XX_PERFCOUNTER_GROUP(VBIF, vbif),
+	A5XX_PERFCOUNTER_GROUP_FLAGS(VBIF_PWR, vbif_pwr,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+	A5XX_PERFCOUNTER_GROUP_FLAGS(ALWAYSON, alwayson,
+		ADRENO_PERFCOUNTER_GROUP_FIXED),
+	A5XX_POWER_COUNTER_GROUP(SP, sp),
+	A5XX_POWER_COUNTER_GROUP(TP, tp),
+	A5XX_POWER_COUNTER_GROUP(RB, rb),
+	A5XX_POWER_COUNTER_GROUP(CCU, ccu),
+	A5XX_POWER_COUNTER_GROUP(UCHE, uche),
+	A5XX_POWER_COUNTER_GROUP(CP, cp),
+	A5XX_POWER_COUNTER_GROUP(GPMU, gpmu),
+	A5XX_POWER_COUNTER_GROUP(ALWAYSON, alwayson),
+};
+
+static struct adreno_perfcounters a5xx_perfcounters = {
+	a5xx_perfcounter_groups,
+	ARRAY_SIZE(a5xx_perfcounter_groups),
+};
+
+static struct adreno_ft_perf_counters a5xx_ft_perf_counters[] = {
+	{KGSL_PERFCOUNTER_GROUP_SP, A5XX_SP_ALU_ACTIVE_CYCLES},
+	{KGSL_PERFCOUNTER_GROUP_SP, A5XX_SP0_ICL1_MISSES},
+	{KGSL_PERFCOUNTER_GROUP_SP, A5XX_SP_FS_CFLOW_INSTRUCTIONS},
+	{KGSL_PERFCOUNTER_GROUP_TSE, A5XX_TSE_INPUT_PRIM_NUM},
+};
+
+static unsigned int a5xx_int_bits[ADRENO_INT_BITS_MAX] = {
+	ADRENO_INT_DEFINE(ADRENO_INT_RBBM_AHB_ERROR, A5XX_INT_RBBM_AHB_ERROR),
+};
+
+/* Register offset defines for A5XX, in order of enum adreno_regs */
+static unsigned int a5xx_register_offsets[ADRENO_REG_REGISTER_MAX] = {
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_WFI_PEND_CTR, A5XX_CP_WFI_PEND_CTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE, A5XX_CP_RB_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_BASE_HI, A5XX_CP_RB_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_LO,
+			A5XX_CP_RB_RPTR_ADDR_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR_ADDR_HI,
+			A5XX_CP_RB_RPTR_ADDR_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_RPTR, A5XX_CP_RB_RPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_WPTR, A5XX_CP_RB_WPTR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CNTL, A5XX_CP_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ME_CNTL, A5XX_CP_ME_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_RB_CNTL, A5XX_CP_RB_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE, A5XX_CP_IB1_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BASE_HI, A5XX_CP_IB1_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB1_BUFSZ, A5XX_CP_IB1_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE, A5XX_CP_IB2_BASE),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BASE_HI, A5XX_CP_IB2_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_IB2_BUFSZ, A5XX_CP_IB2_BUFSZ),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_ADDR, A5XX_CP_ROQ_DBG_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_ROQ_DATA, A5XX_CP_ROQ_DBG_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_ADDR, A5XX_CP_MERCIU_DBG_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA, A5XX_CP_MERCIU_DBG_DATA_1),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MERCIU_DATA2,
+				A5XX_CP_MERCIU_DBG_DATA_2),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_ADDR, A5XX_CP_MEQ_DBG_ADDR),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_MEQ_DATA, A5XX_CP_MEQ_DBG_DATA),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PROTECT_REG_0, A5XX_CP_PROTECT_REG_0),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT, A5XX_CP_CONTEXT_SWITCH_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DEBUG, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_PREEMPT_DISABLE, ADRENO_REG_SKIP),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+				A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+				A5XX_CP_CONTEXT_SWITCH_SMMU_INFO_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS, A5XX_RBBM_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_STATUS3, A5XX_RBBM_STATUS3),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_CTL, A5XX_RBBM_PERFCTR_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
+					A5XX_RBBM_PERFCTR_LOAD_CMD0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
+					A5XX_RBBM_PERFCTR_LOAD_CMD1),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2,
+					A5XX_RBBM_PERFCTR_LOAD_CMD2),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3,
+					A5XX_RBBM_PERFCTR_LOAD_CMD3),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_MASK, A5XX_RBBM_INT_0_MASK),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_0_STATUS, A5XX_RBBM_INT_0_STATUS),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_CLOCK_CTL, A5XX_RBBM_CLOCK_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_INT_CLEAR_CMD,
+				A5XX_RBBM_INT_CLEAR_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SW_RESET_CMD, A5XX_RBBM_SW_RESET_CMD),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD,
+					  A5XX_RBBM_BLOCK_SW_RESET_CMD),
+		ADRENO_REG_DEFINE(ADRENO_REG_RBBM_BLOCK_SW_RESET_CMD2,
+					  A5XX_RBBM_BLOCK_SW_RESET_CMD2),
+	ADRENO_REG_DEFINE(ADRENO_REG_UCHE_INVALIDATE0, A5XX_UCHE_INVALIDATE0),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
+				A5XX_RBBM_PERFCTR_LOAD_VALUE_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI,
+				A5XX_RBBM_PERFCTR_LOAD_VALUE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TRUST_CONTROL,
+				A5XX_RBBM_SECVID_TRUST_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TRUST_CONFIG,
+				A5XX_RBBM_SECVID_TRUST_CONFIG),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_CONTROL,
+				A5XX_RBBM_SECVID_TSB_CNTL),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE,
+				A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_BASE_HI,
+				A5XX_RBBM_SECVID_TSB_TRUSTED_BASE_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_SECVID_TSB_TRUSTED_SIZE,
+				A5XX_RBBM_SECVID_TSB_TRUSTED_SIZE),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
+				A5XX_RBBM_ALWAYSON_COUNTER_LO),
+	ADRENO_REG_DEFINE(ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI,
+				A5XX_RBBM_ALWAYSON_COUNTER_HI),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL0,
+				A5XX_VBIF_XIN_HALT_CTRL0),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_XIN_HALT_CTRL1,
+				A5XX_VBIF_XIN_HALT_CTRL1),
+	ADRENO_REG_DEFINE(ADRENO_REG_VBIF_VERSION,
+				A5XX_VBIF_VERSION),
+};
+
+static const struct adreno_reg_offsets a5xx_reg_offsets = {
+	.offsets = a5xx_register_offsets,
+	.offset_0 = ADRENO_REG_REGISTER_MAX,
+};
+
+static void a5xx_cp_hw_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status1, status2;
+
+	kgsl_regread(device, A5XX_CP_INTERRUPT_STATUS, &status1);
+
+	if (status1 & BIT(A5XX_CP_OPCODE_ERROR)) {
+		unsigned int val;
+
+		kgsl_regwrite(device, A5XX_CP_PFP_STAT_ADDR, 0);
+
+		/*
+		 * A5XX_CP_PFP_STAT_DATA is indexed, so read it twice to get the
+		 * value we want
+		 */
+		kgsl_regread(device, A5XX_CP_PFP_STAT_DATA, &val);
+		kgsl_regread(device, A5XX_CP_PFP_STAT_DATA, &val);
+
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"ringbuffer opcode error | possible opcode=0x%8.8X\n",
+			val);
+	}
+	if (status1 & BIT(A5XX_CP_RESERVED_BIT_ERROR))
+		KGSL_DRV_CRIT_RATELIMIT(device,
+					"ringbuffer reserved bit error interrupt\n");
+	if (status1 & BIT(A5XX_CP_HW_FAULT_ERROR)) {
+		kgsl_regread(device, A5XX_CP_HW_FAULT, &status2);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+					"CP | Ringbuffer HW fault | status=%x\n",
+					status2);
+	}
+	if (status1 & BIT(A5XX_CP_DMA_ERROR))
+		KGSL_DRV_CRIT_RATELIMIT(device, "CP | DMA error\n");
+	if (status1 & BIT(A5XX_CP_REGISTER_PROTECTION_ERROR)) {
+		kgsl_regread(device, A5XX_CP_PROTECT_STATUS, &status2);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+					"CP | Protected mode error| %s | addr=%x | status=%x\n",
+					status2 & (1 << 24) ? "WRITE" : "READ",
+					(status2 & 0xFFFFF) >> 2, status2);
+	}
+	if (status1 & BIT(A5XX_CP_AHB_ERROR)) {
+		kgsl_regread(device, A5XX_CP_AHB_FAULT, &status2);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+					"ringbuffer AHB error interrupt | status=%x\n",
+					status2);
+	}
+}
+
+static void a5xx_err_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg;
+
+	switch (bit) {
+	case A5XX_INT_RBBM_AHB_ERROR: {
+		kgsl_regread(device, A5XX_RBBM_AHB_ERROR_STATUS, &reg);
+
+		/*
+		 * Return the word address of the erroring register so that it
+		 * matches the register specification
+		 */
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM | AHB bus error | %s | addr=%x | ports=%x:%x\n",
+			reg & (1 << 28) ? "WRITE" : "READ",
+			(reg & 0xFFFFF) >> 2, (reg >> 20) & 0x3,
+			(reg >> 24) & 0xF);
+
+		/* Clear the error */
+		kgsl_regwrite(device, A5XX_RBBM_AHB_CMD, (1 << 4));
+		break;
+	}
+	case A5XX_INT_RBBM_TRANSFER_TIMEOUT:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: AHB transfer timeout\n");
+		break;
+	case A5XX_INT_RBBM_ME_MS_TIMEOUT:
+		kgsl_regread(device, A5XX_RBBM_AHB_ME_SPLIT_STATUS, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM | ME master split timeout | status=%x\n", reg);
+		break;
+	case A5XX_INT_RBBM_PFP_MS_TIMEOUT:
+		kgsl_regread(device, A5XX_RBBM_AHB_PFP_SPLIT_STATUS, &reg);
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM | PFP master split timeout | status=%x\n", reg);
+		break;
+	case A5XX_INT_RBBM_ETS_MS_TIMEOUT:
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"RBBM: ME master split timeout\n");
+		break;
+	case A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ATB ASYNC overflow\n");
+		break;
+	case A5XX_INT_RBBM_ATB_BUS_OVERFLOW:
+		KGSL_DRV_CRIT_RATELIMIT(device, "RBBM: ATB bus overflow\n");
+		break;
+	case A5XX_INT_UCHE_OOB_ACCESS:
+		KGSL_DRV_CRIT_RATELIMIT(device, "UCHE: Out of bounds access\n");
+		break;
+	case A5XX_INT_UCHE_TRAP_INTR:
+		KGSL_DRV_CRIT_RATELIMIT(device, "UCHE: Trap interrupt\n");
+		break;
+	case A5XX_INT_GPMU_VOLTAGE_DROOP:
+		KGSL_DRV_CRIT_RATELIMIT(device, "GPMU: Voltage droop\n");
+		break;
+	default:
+		KGSL_DRV_CRIT_RATELIMIT(device, "Unknown interrupt %d\n", bit);
+	}
+}
+
+static void a5xx_irq_storm_worker(struct work_struct *work)
+{
+	struct adreno_device *adreno_dev = container_of(work,
+			struct adreno_device, irq_storm_work);
+	struct kgsl_device *device = &adreno_dev->dev;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	unsigned int status;
+
+	mutex_lock(&device->mutex);
+
+	/* Wait for the storm to clear up */
+	do {
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_CLEAR_CMD,
+				BIT(A5XX_INT_CP_CACHE_FLUSH_TS));
+		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_INT_0_STATUS,
+				&status);
+	} while (status & BIT(A5XX_INT_CP_CACHE_FLUSH_TS));
+
+	/* Re-enable the interrupt bit in the mask */
+	gpudev->irq->mask |= BIT(A5XX_INT_CP_CACHE_FLUSH_TS);
+	adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK,
+			gpudev->irq->mask);
+	clear_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED, &adreno_dev->priv);
+
+	KGSL_DRV_WARN(device, "Re-enabled A5XX_INT_CP_CACHE_FLUSH_TS");
+	mutex_unlock(&device->mutex);
+
+	/* Reschedule just to make sure everything retires */
+	adreno_dispatcher_schedule(device);
+}
+
+static void a5xx_cp_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = &adreno_dev->dev;
+	unsigned int cur;
+	static unsigned int count;
+	static unsigned int prev;
+
+	if (test_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED, &adreno_dev->priv))
+		return;
+
+	kgsl_sharedmem_readl(&device->memstore, &cur,
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+				ref_wait_ts));
+
+	/*
+	 * prev holds a previously read value
+	 * from memory.  It should be changed by the GPU with every
+	 * interrupt. If the value we know about and the value we just
+	 * read are the same, then we are likely in a storm.
+	 * If this happens twice, disable the interrupt in the mask
+	 * so the dispatcher can take care of the issue. It is then
+	 * up to the dispatcher to re-enable the mask once all work
+	 * is done and the storm has ended.
+	 */
+	if (prev == cur) {
+		count++;
+		if (count == 2) {
+			struct adreno_gpudev *gpudev =
+				ADRENO_GPU_DEVICE(adreno_dev);
+
+			/* disable interrupt from the mask */
+			set_bit(ADRENO_DEVICE_CACHE_FLUSH_TS_SUSPENDED,
+					&adreno_dev->priv);
+			gpudev->irq->mask &= ~BIT(A5XX_INT_CP_CACHE_FLUSH_TS);
+			adreno_writereg(adreno_dev, ADRENO_REG_RBBM_INT_0_MASK,
+					gpudev->irq->mask);
+
+			kgsl_schedule_work(&adreno_dev->irq_storm_work);
+
+			return;
+		}
+	} else {
+		count = 0;
+		prev = cur;
+	}
+
+	a5xx_preemption_trigger(adreno_dev);
+	adreno_dispatcher_schedule(device);
+}
+
+static const char *gpmu_int_msg[32] = {
+	[FW_INTR_INFO] = "FW_INTR_INFO",
+	[LLM_ACK_ERR_INTR] = "LLM_ACK_ERR_INTR",
+	[ISENS_TRIM_ERR_INTR] = "ISENS_TRIM_ERR_INTR",
+	[ISENS_ERR_INTR] = "ISENS_ERR_INTR",
+	[ISENS_IDLE_ERR_INTR] = "ISENS_IDLE_ERR_INTR",
+	[ISENS_PWR_ON_ERR_INTR] = "ISENS_PWR_ON_ERR_INTR",
+	[6 ... 30] = "",
+	[WDOG_EXPITED] = "WDOG_EXPITED"};
+
+static void a5xx_gpmu_int_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int reg, i;
+
+	kgsl_regread(device, A5XX_GPMU_RBBM_INTR_INFO, &reg);
+
+	if (reg & (~VALID_GPMU_IRQ)) {
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"GPMU: Unknown IRQ mask 0x%08lx in 0x%08x\n",
+				reg & (~VALID_GPMU_IRQ), reg);
+	}
+
+	for (i = 0; i < 32; i++)
+		switch (reg & BIT(i)) {
+		case BIT(WDOG_EXPITED):
+			if (test_and_clear_bit(ADRENO_DEVICE_GPMU_INITIALIZED,
+				&adreno_dev->priv)) {
+				/* Stop GPMU */
+				kgsl_regwrite(device,
+					A5XX_GPMU_CM3_SYSRESET, 1);
+				kgsl_schedule_work(&adreno_dev->gpmu_work);
+			}
+			/* fallthrough */
+		case BIT(FW_INTR_INFO):
+		case BIT(LLM_ACK_ERR_INTR):
+		case BIT(ISENS_TRIM_ERR_INTR):
+		case BIT(ISENS_ERR_INTR):
+		case BIT(ISENS_IDLE_ERR_INTR):
+		case BIT(ISENS_PWR_ON_ERR_INTR):
+			KGSL_DRV_CRIT_RATELIMIT(device,
+				"GPMU: interrupt %s(%08lx)\n",
+				gpmu_int_msg[i],
+				BIT(i));
+			break;
+	}
+}
+
+/*
+ * a5x_gpc_err_int_callback() - Isr for GPC error interrupts
+ * @adreno_dev: Pointer to device
+ * @bit: Interrupt bit
+ */
+void a5x_gpc_err_int_callback(struct adreno_device *adreno_dev, int bit)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/*
+	 * GPC error is typically the result of mistake SW programming.
+	 * Force GPU fault for this interrupt so that we can debug it
+	 * with help of register dump.
+	 */
+
+	KGSL_DRV_CRIT(device, "RBBM: GPC error\n");
+	adreno_irqctrl(adreno_dev, 0);
+
+	/* Trigger a fault in the dispatcher - this will effect a restart */
+	adreno_set_gpu_fault(adreno_dev, ADRENO_SOFT_FAULT);
+	adreno_dispatcher_schedule(device);
+}
+
+#define A5XX_INT_MASK \
+	((1 << A5XX_INT_RBBM_AHB_ERROR) |		\
+	 (1 << A5XX_INT_RBBM_TRANSFER_TIMEOUT) |		\
+	 (1 << A5XX_INT_RBBM_ME_MS_TIMEOUT) |		\
+	 (1 << A5XX_INT_RBBM_PFP_MS_TIMEOUT) |		\
+	 (1 << A5XX_INT_RBBM_ETS_MS_TIMEOUT) |		\
+	 (1 << A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW) |		\
+	 (1 << A5XX_INT_RBBM_GPC_ERROR) |		\
+	 (1 << A5XX_INT_CP_HW_ERROR) |	\
+	 (1 << A5XX_INT_CP_CACHE_FLUSH_TS) |		\
+	 (1 << A5XX_INT_RBBM_ATB_BUS_OVERFLOW) |	\
+	 (1 << A5XX_INT_UCHE_OOB_ACCESS) |		\
+	 (1 << A5XX_INT_UCHE_TRAP_INTR) |		\
+	 (1 << A5XX_INT_CP_SW) |			\
+	 (1 << A5XX_INT_GPMU_FIRMWARE) |                \
+	 (1 << A5XX_INT_GPMU_VOLTAGE_DROOP))
+
+
+static struct adreno_irq_funcs a5xx_irq_funcs[32] = {
+	ADRENO_IRQ_CALLBACK(NULL),              /* 0 - RBBM_GPU_IDLE */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 1 - RBBM_AHB_ERROR */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 2 - RBBM_TRANSFER_TIMEOUT */
+	/* 3 - RBBM_ME_MASTER_SPLIT_TIMEOUT  */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback),
+	/* 4 - RBBM_PFP_MASTER_SPLIT_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback),
+	 /* 5 - RBBM_ETS_MASTER_SPLIT_TIMEOUT */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback),
+	/* 6 - RBBM_ATB_ASYNC_OVERFLOW */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback),
+	ADRENO_IRQ_CALLBACK(a5x_gpc_err_int_callback), /* 7 - GPC_ERR */
+	ADRENO_IRQ_CALLBACK(a5xx_preempt_callback),/* 8 - CP_SW */
+	ADRENO_IRQ_CALLBACK(a5xx_cp_hw_err_callback), /* 9 - CP_HW_ERROR */
+	/* 10 - CP_CCU_FLUSH_DEPTH_TS */
+	ADRENO_IRQ_CALLBACK(NULL),
+	 /* 11 - CP_CCU_FLUSH_COLOR_TS */
+	ADRENO_IRQ_CALLBACK(NULL),
+	 /* 12 - CP_CCU_RESOLVE_TS */
+	ADRENO_IRQ_CALLBACK(NULL),
+	ADRENO_IRQ_CALLBACK(NULL), /* 13 - CP_IB2_INT */
+	ADRENO_IRQ_CALLBACK(NULL), /* 14 - CP_IB1_INT */
+	ADRENO_IRQ_CALLBACK(NULL), /* 15 - CP_RB_INT */
+	/* 16 - CCP_UNUSED_1 */
+	ADRENO_IRQ_CALLBACK(NULL),
+	ADRENO_IRQ_CALLBACK(NULL), /* 17 - CP_RB_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 18 - CP_WT_DONE_TS */
+	ADRENO_IRQ_CALLBACK(NULL), /* 19 - UNKNOWN_1 */
+	ADRENO_IRQ_CALLBACK(a5xx_cp_callback), /* 20 - CP_CACHE_FLUSH_TS */
+	/* 21 - UNUSED_2 */
+	ADRENO_IRQ_CALLBACK(NULL),
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 22 - RBBM_ATB_BUS_OVERFLOW */
+	/* 23 - MISC_HANG_DETECT */
+	ADRENO_IRQ_CALLBACK(adreno_hang_int_callback),
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 24 - UCHE_OOB_ACCESS */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 25 - UCHE_TRAP_INTR */
+	ADRENO_IRQ_CALLBACK(NULL), /* 26 - DEBBUS_INTR_0 */
+	ADRENO_IRQ_CALLBACK(NULL), /* 27 - DEBBUS_INTR_1 */
+	ADRENO_IRQ_CALLBACK(a5xx_err_callback), /* 28 - GPMU_VOLTAGE_DROOP */
+	ADRENO_IRQ_CALLBACK(a5xx_gpmu_int_callback), /* 29 - GPMU_FIRMWARE */
+	ADRENO_IRQ_CALLBACK(NULL), /* 30 - ISDB_CPU_IRQ */
+	ADRENO_IRQ_CALLBACK(NULL), /* 31 - ISDB_UNDER_DEBUG */
+};
+
+static struct adreno_irq a5xx_irq = {
+	.funcs = a5xx_irq_funcs,
+	.mask = A5XX_INT_MASK,
+};
+
+/*
+ * Default size for CP queues for A5xx targets. You must
+ * overwrite these value in platform_setup function for
+ * A5xx derivatives if size differs.
+ */
+static struct adreno_snapshot_sizes a5xx_snap_sizes = {
+	.cp_pfp = 36,
+	.cp_me = 29,
+	.cp_meq = 64,
+	.cp_merciu = 64,
+	.roq = 512,
+};
+
+static struct adreno_snapshot_data a5xx_snapshot_data = {
+	.sect_sizes = &a5xx_snap_sizes,
+};
+
+static struct adreno_coresight_register a5xx_coresight_registers[] = {
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_A },
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_B },
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_C },
+	{ A5XX_RBBM_CFG_DBGBUS_SEL_D },
+	{ A5XX_RBBM_CFG_DBGBUS_CNTLT },
+	{ A5XX_RBBM_CFG_DBGBUS_CNTLM },
+	{ A5XX_RBBM_CFG_DBGBUS_OPL },
+	{ A5XX_RBBM_CFG_DBGBUS_OPE },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTL_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKL_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_BYTEL_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_BYTEL_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_IVTE_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_0 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_1 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_2 },
+	{ A5XX_RBBM_CFG_DBGBUS_MASKE_3 },
+	{ A5XX_RBBM_CFG_DBGBUS_NIBBLEE },
+	{ A5XX_RBBM_CFG_DBGBUS_PTRC0 },
+	{ A5XX_RBBM_CFG_DBGBUS_PTRC1 },
+	{ A5XX_RBBM_CFG_DBGBUS_LOADREG },
+	{ A5XX_RBBM_CFG_DBGBUS_IDX },
+	{ A5XX_RBBM_CFG_DBGBUS_CLRC },
+	{ A5XX_RBBM_CFG_DBGBUS_LOADIVT },
+	{ A5XX_RBBM_CFG_DBGBUS_EVENT_LOGIC },
+	{ A5XX_RBBM_CFG_DBGBUS_OVER },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT0 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT1 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT2 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT3 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT4 },
+	{ A5XX_RBBM_CFG_DBGBUS_COUNT5 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_ADDR },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF0 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF3 },
+	{ A5XX_RBBM_CFG_DBGBUS_TRACE_BUF4 },
+	{ A5XX_RBBM_CFG_DBGBUS_MISR0 },
+	{ A5XX_RBBM_CFG_DBGBUS_MISR1 },
+	{ A5XX_RBBM_AHB_DBG_CNTL },
+	{ A5XX_RBBM_READ_AHB_THROUGH_DBG },
+	{ A5XX_RBBM_DBG_LO_HI_GPIO },
+	{ A5XX_RBBM_EXT_TRACE_BUS_CNTL },
+	{ A5XX_RBBM_EXT_VBIF_DBG_CNTL },
+};
+
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_a, &a5xx_coresight_registers[0]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_b, &a5xx_coresight_registers[1]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_c, &a5xx_coresight_registers[2]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_sel_d, &a5xx_coresight_registers[3]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlt, &a5xx_coresight_registers[4]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_cntlm, &a5xx_coresight_registers[5]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_opl, &a5xx_coresight_registers[6]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ope, &a5xx_coresight_registers[7]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_0, &a5xx_coresight_registers[8]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_1, &a5xx_coresight_registers[9]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_2, &a5xx_coresight_registers[10]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivtl_3, &a5xx_coresight_registers[11]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_0, &a5xx_coresight_registers[12]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_1, &a5xx_coresight_registers[13]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_2, &a5xx_coresight_registers[14]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maskl_3, &a5xx_coresight_registers[15]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_0, &a5xx_coresight_registers[16]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_bytel_1, &a5xx_coresight_registers[17]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_0, &a5xx_coresight_registers[18]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_1, &a5xx_coresight_registers[19]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_2, &a5xx_coresight_registers[20]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ivte_3, &a5xx_coresight_registers[21]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_0, &a5xx_coresight_registers[22]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_1, &a5xx_coresight_registers[23]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_2, &a5xx_coresight_registers[24]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_maske_3, &a5xx_coresight_registers[25]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_nibblee, &a5xx_coresight_registers[26]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc0, &a5xx_coresight_registers[27]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_ptrc1, &a5xx_coresight_registers[28]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadreg, &a5xx_coresight_registers[29]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_idx, &a5xx_coresight_registers[30]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_clrc, &a5xx_coresight_registers[31]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_loadivt, &a5xx_coresight_registers[32]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_event_logic,
+				&a5xx_coresight_registers[33]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_over, &a5xx_coresight_registers[34]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count0, &a5xx_coresight_registers[35]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count1, &a5xx_coresight_registers[36]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count2, &a5xx_coresight_registers[37]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count3, &a5xx_coresight_registers[38]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count4, &a5xx_coresight_registers[39]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_count5, &a5xx_coresight_registers[40]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_addr,
+				&a5xx_coresight_registers[41]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf0,
+				&a5xx_coresight_registers[42]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf1,
+				&a5xx_coresight_registers[43]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf2,
+				&a5xx_coresight_registers[44]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf3,
+				&a5xx_coresight_registers[45]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_trace_buf4,
+				&a5xx_coresight_registers[46]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr0, &a5xx_coresight_registers[47]);
+static ADRENO_CORESIGHT_ATTR(cfg_dbgbus_misr1, &a5xx_coresight_registers[48]);
+static ADRENO_CORESIGHT_ATTR(ahb_dbg_cntl, &a5xx_coresight_registers[49]);
+static ADRENO_CORESIGHT_ATTR(read_ahb_through_dbg,
+				&a5xx_coresight_registers[50]);
+static ADRENO_CORESIGHT_ATTR(dbg_lo_hi_gpio, &a5xx_coresight_registers[51]);
+static ADRENO_CORESIGHT_ATTR(ext_trace_bus_cntl, &a5xx_coresight_registers[52]);
+static ADRENO_CORESIGHT_ATTR(ext_vbif_dbg_cntl, &a5xx_coresight_registers[53]);
+
+static struct attribute *a5xx_coresight_attrs[] = {
+	&coresight_attr_cfg_dbgbus_sel_a.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_b.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_c.attr.attr,
+	&coresight_attr_cfg_dbgbus_sel_d.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlt.attr.attr,
+	&coresight_attr_cfg_dbgbus_cntlm.attr.attr,
+	&coresight_attr_cfg_dbgbus_opl.attr.attr,
+	&coresight_attr_cfg_dbgbus_ope.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivtl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maskl_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_bytel_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_ivte_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_0.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_1.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_2.attr.attr,
+	&coresight_attr_cfg_dbgbus_maske_3.attr.attr,
+	&coresight_attr_cfg_dbgbus_nibblee.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc0.attr.attr,
+	&coresight_attr_cfg_dbgbus_ptrc1.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadreg.attr.attr,
+	&coresight_attr_cfg_dbgbus_idx.attr.attr,
+	&coresight_attr_cfg_dbgbus_clrc.attr.attr,
+	&coresight_attr_cfg_dbgbus_loadivt.attr.attr,
+	&coresight_attr_cfg_dbgbus_event_logic.attr.attr,
+	&coresight_attr_cfg_dbgbus_over.attr.attr,
+	&coresight_attr_cfg_dbgbus_count0.attr.attr,
+	&coresight_attr_cfg_dbgbus_count1.attr.attr,
+	&coresight_attr_cfg_dbgbus_count2.attr.attr,
+	&coresight_attr_cfg_dbgbus_count3.attr.attr,
+	&coresight_attr_cfg_dbgbus_count4.attr.attr,
+	&coresight_attr_cfg_dbgbus_count5.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_addr.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf0.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf1.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf2.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf3.attr.attr,
+	&coresight_attr_cfg_dbgbus_trace_buf4.attr.attr,
+	&coresight_attr_cfg_dbgbus_misr0.attr.attr,
+	&coresight_attr_cfg_dbgbus_misr1.attr.attr,
+	&coresight_attr_ahb_dbg_cntl.attr.attr,
+	&coresight_attr_read_ahb_through_dbg.attr.attr,
+	&coresight_attr_dbg_lo_hi_gpio.attr.attr,
+	&coresight_attr_ext_trace_bus_cntl.attr.attr,
+	&coresight_attr_ext_vbif_dbg_cntl.attr.attr,
+	NULL,
+};
+
+static const struct attribute_group a5xx_coresight_group = {
+	.attrs = a5xx_coresight_attrs,
+};
+
+static const struct attribute_group *a5xx_coresight_groups[] = {
+	&a5xx_coresight_group,
+	NULL,
+};
+
+static struct adreno_coresight a5xx_coresight = {
+	.registers = a5xx_coresight_registers,
+	.count = ARRAY_SIZE(a5xx_coresight_registers),
+	.groups = a5xx_coresight_groups,
+};
+
+struct adreno_gpudev adreno_a5xx_gpudev = {
+	.reg_offsets = &a5xx_reg_offsets,
+	.int_bits = a5xx_int_bits,
+	.ft_perf_counters = a5xx_ft_perf_counters,
+	.ft_perf_counters_count = ARRAY_SIZE(a5xx_ft_perf_counters),
+	.coresight = &a5xx_coresight,
+	.start = a5xx_start,
+	.snapshot = a5xx_snapshot,
+	.irq = &a5xx_irq,
+	.snapshot_data = &a5xx_snapshot_data,
+	.irq_trace = trace_kgsl_a5xx_irq_status,
+	.num_prio_levels = KGSL_PRIORITY_MAX_RB_LEVELS,
+	.platform_setup = a5xx_platform_setup,
+	.init = a5xx_init,
+	.remove = a5xx_remove,
+	.rb_start = a5xx_rb_start,
+	.microcode_read = a5xx_microcode_read,
+	.perfcounters = &a5xx_perfcounters,
+	.vbif_xin_halt_ctrl0_mask = A5XX_VBIF_XIN_HALT_CTRL0_MASK,
+	.is_sptp_idle = a5xx_is_sptp_idle,
+	.regulator_enable = a5xx_regulator_enable,
+	.regulator_disable = a5xx_regulator_disable,
+	.pwrlevel_change_settings = a5xx_pwrlevel_change_settings,
+	.read_throttling_counters = a5xx_read_throttling_counters,
+	.count_throttles = a5xx_count_throttles,
+	.enable_pwr_counters = a5xx_enable_pwr_counters,
+	.preemption_pre_ibsubmit = a5xx_preemption_pre_ibsubmit,
+	.preemption_yield_enable =
+				a5xx_preemption_yield_enable,
+	.preemption_post_ibsubmit =
+			a5xx_preemption_post_ibsubmit,
+	.preemption_init = a5xx_preemption_init,
+	.preemption_schedule = a5xx_preemption_schedule,
+	.enable_64bit = a5xx_enable_64bit,
+	.clk_set_options = a5xx_clk_set_options,
+};
diff --git a/drivers/gpu/msm/adreno_a5xx.h b/drivers/gpu/msm/adreno_a5xx.h
new file mode 100644
index 0000000..08fd16a
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a5xx.h
@@ -0,0 +1,259 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _ADRENO_A5XX_H_
+#define _ADRENO_A5XX_H_
+
+#include "a5xx_reg.h"
+
+#define A5XX_IRQ_FLAGS \
+	{ BIT(A5XX_INT_RBBM_GPU_IDLE), "RBBM_GPU_IDLE" }, \
+	{ BIT(A5XX_INT_RBBM_AHB_ERROR), "RBBM_AHB_ERR" }, \
+	{ BIT(A5XX_INT_RBBM_TRANSFER_TIMEOUT), "RBBM_TRANSFER_TIMEOUT" }, \
+	{ BIT(A5XX_INT_RBBM_ME_MS_TIMEOUT), "RBBM_ME_MS_TIMEOUT" }, \
+	{ BIT(A5XX_INT_RBBM_PFP_MS_TIMEOUT), "RBBM_PFP_MS_TIMEOUT" }, \
+	{ BIT(A5XX_INT_RBBM_ETS_MS_TIMEOUT), "RBBM_ETS_MS_TIMEOUT" }, \
+	{ BIT(A5XX_INT_RBBM_ATB_ASYNC_OVERFLOW), "RBBM_ATB_ASYNC_OVERFLOW" }, \
+	{ BIT(A5XX_INT_RBBM_GPC_ERROR), "RBBM_GPC_ERR" }, \
+	{ BIT(A5XX_INT_CP_SW), "CP_SW" }, \
+	{ BIT(A5XX_INT_CP_HW_ERROR), "CP_OPCODE_ERROR" }, \
+	{ BIT(A5XX_INT_CP_CCU_FLUSH_DEPTH_TS), "CP_CCU_FLUSH_DEPTH_TS" }, \
+	{ BIT(A5XX_INT_CP_CCU_FLUSH_COLOR_TS), "CP_CCU_FLUSH_COLOR_TS" }, \
+	{ BIT(A5XX_INT_CP_CCU_RESOLVE_TS), "CP_CCU_RESOLVE_TS" }, \
+	{ BIT(A5XX_INT_CP_IB2), "CP_IB2_INT" }, \
+	{ BIT(A5XX_INT_CP_IB1), "CP_IB1_INT" }, \
+	{ BIT(A5XX_INT_CP_RB), "CP_RB_INT" }, \
+	{ BIT(A5XX_INT_CP_UNUSED_1), "CP_UNUSED_1" }, \
+	{ BIT(A5XX_INT_CP_RB_DONE_TS), "CP_RB_DONE_TS" }, \
+	{ BIT(A5XX_INT_CP_WT_DONE_TS), "CP_WT_DONE_TS" }, \
+	{ BIT(A5XX_INT_UNKNOWN_1), "UNKNOWN_1" }, \
+	{ BIT(A5XX_INT_CP_CACHE_FLUSH_TS), "CP_CACHE_FLUSH_TS" }, \
+	{ BIT(A5XX_INT_UNUSED_2), "UNUSED_2" }, \
+	{ BIT(A5XX_INT_RBBM_ATB_BUS_OVERFLOW), "RBBM_ATB_BUS_OVERFLOW" }, \
+	{ BIT(A5XX_INT_MISC_HANG_DETECT), "MISC_HANG_DETECT" }, \
+	{ BIT(A5XX_INT_UCHE_OOB_ACCESS), "UCHE_OOB_ACCESS" }, \
+	{ BIT(A5XX_INT_UCHE_TRAP_INTR), "UCHE_TRAP_INTR" }, \
+	{ BIT(A5XX_INT_DEBBUS_INTR_0), "DEBBUS_INTR_0" }, \
+	{ BIT(A5XX_INT_DEBBUS_INTR_1), "DEBBUS_INTR_1" }, \
+	{ BIT(A5XX_INT_GPMU_VOLTAGE_DROOP), "GPMU_VOLTAGE_DROOP" }, \
+	{ BIT(A5XX_INT_GPMU_FIRMWARE), "GPMU_FIRMWARE" }, \
+	{ BIT(A5XX_INT_ISDB_CPU_IRQ), "ISDB_CPU_IRQ" }, \
+	{ BIT(A5XX_INT_ISDB_UNDER_DEBUG), "ISDB_UNDER_DEBUG" }
+
+#define A5XX_CP_CTXRECORD_MAGIC_REF     0x27C4BAFCUL
+/* Size of each CP preemption record */
+#define A5XX_CP_CTXRECORD_SIZE_IN_BYTES     0x10000
+/* Size of the preemption counter block (in bytes) */
+#define A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE   (16 * 4)
+
+/**
+ * struct a5xx_cp_preemption_record - CP context record for
+ * preemption.
+ * @magic: (00) Value at this offset must be equal to
+ * A5XX_CP_CTXRECORD_MAGIC_REF.
+ * @info: (04) Type of record. Written non-zero (usually) by CP.
+ * we must set to zero for all ringbuffers.
+ * @data: (08) DATA field in SET_RENDER_MODE or checkpoint packets.
+ * Written by CP when switching out. Not used on switch-in.
+ * we must initialize to zero.
+ * @cntl: (12) RB_CNTL, saved and restored by CP.
+ * @rptr: (16) RB_RPTR, saved and restored by CP.
+ * @wptr: (20) RB_WPTR, saved and restored by CP.
+ * @rptr_addr: (24) RB_RPTR_ADDR_LO|HI saved and restored.
+ * rbase: (32) RB_BASE_LO|HI saved and restored.
+ * counter: (40) Pointer to preemption counter
+ */
+struct a5xx_cp_preemption_record {
+	uint32_t  magic;
+	uint32_t  info;
+	uint32_t  data;
+	uint32_t  cntl;
+	uint32_t  rptr;
+	uint32_t  wptr;
+	uint64_t  rptr_addr;
+	uint64_t  rbase;
+	uint64_t  counter;
+};
+
+#define A5XX_CP_SMMU_INFO_MAGIC_REF     0x3618CDA3UL
+
+/**
+ * struct a5xx_cp_smmu_info - CP preemption SMMU info.
+ * @magic: (00) The value at this offset must be equal to
+ * A5XX_CP_SMMU_INFO_MAGIC_REF.
+ * @_pad4: (04) Reserved/padding
+ * @ttbr0: (08) Base address of the page table for the
+ * incoming context.
+ * @context_idr: (16) Context Identification Register value.
+ */
+struct a5xx_cp_smmu_info {
+	uint32_t  magic;
+	uint32_t  _pad4;
+	uint64_t  ttbr0;
+	uint32_t  asid;
+	uint32_t  context_idr;
+};
+
+void a5xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot);
+unsigned int a5xx_num_registers(void);
+
+void a5xx_crashdump_init(struct adreno_device *adreno_dev);
+
+void a5xx_hwcg_set(struct adreno_device *adreno_dev, bool on);
+
+#define A5XX_CP_RB_CNTL_DEFAULT (((ilog2(4) << 8) & 0x1F00) | \
+		(ilog2(KGSL_RB_DWORDS >> 1) & 0x3F))
+/* GPMU interrupt multiplexor */
+#define FW_INTR_INFO			(0)
+#define LLM_ACK_ERR_INTR		(1)
+#define ISENS_TRIM_ERR_INTR		(2)
+#define ISENS_ERR_INTR			(3)
+#define ISENS_IDLE_ERR_INTR		(4)
+#define ISENS_PWR_ON_ERR_INTR		(5)
+#define WDOG_EXPITED			(31)
+
+#define VALID_GPMU_IRQ (\
+	BIT(FW_INTR_INFO) | \
+	BIT(LLM_ACK_ERR_INTR) | \
+	BIT(ISENS_TRIM_ERR_INTR) | \
+	BIT(ISENS_ERR_INTR) | \
+	BIT(ISENS_IDLE_ERR_INTR) | \
+	BIT(ISENS_PWR_ON_ERR_INTR) | \
+	BIT(WDOG_EXPITED))
+
+/* A5XX_GPMU_GPMU_LLM_GLM_SLEEP_CTRL */
+#define STATE_OF_CHILD			GENMASK(5, 4)
+#define STATE_OF_CHILD_01		BIT(4)
+#define STATE_OF_CHILD_11		(BIT(4) | BIT(5))
+#define IDLE_FULL_LM_SLEEP		BIT(0)
+
+/* A5XX_GPMU_GPMU_LLM_GLM_SLEEP_STATUS */
+#define WAKEUP_ACK			BIT(1)
+#define IDLE_FULL_ACK			BIT(0)
+
+/* A5XX_GPMU_GPMU_ISENSE_CTRL */
+#define	ISENSE_CGC_EN_DISABLE		BIT(0)
+
+/* A5XX_GPMU_TEMP_SENSOR_CONFIG */
+#define GPMU_BCL_ENABLED		BIT(4)
+#define GPMU_LLM_ENABLED		BIT(9)
+#define GPMU_ISENSE_STATUS		GENMASK(3, 0)
+#define GPMU_ISENSE_END_POINT_CAL_ERR	BIT(0)
+
+/* A5XX_GPU_CS_AMP_CALIBRATION_CONTROL1 */
+#define AMP_SW_TRIM_START		BIT(0)
+
+/* A5XX_GPU_CS_SENSOR_GENERAL_STATUS */
+#define SS_AMPTRIM_DONE			BIT(11)
+#define CS_PWR_ON_STATUS		BIT(10)
+
+/* A5XX_GPU_CS_AMP_CALIBRATION_STATUS*_* */
+#define AMP_OUT_OF_RANGE_ERR		BIT(4)
+#define AMP_OFFSET_CHECK_MAX_ERR	BIT(2)
+#define AMP_OFFSET_CHECK_MIN_ERR	BIT(1)
+
+/* A5XX_GPU_CS_AMP_CALIBRATION_DONE */
+#define SW_OPAMP_CAL_DONE           BIT(0)
+
+#define AMP_CALIBRATION_ERR (AMP_OFFSET_CHECK_MIN_ERR | \
+		AMP_OFFSET_CHECK_MAX_ERR | AMP_OUT_OF_RANGE_ERR)
+
+#define AMP_CALIBRATION_RETRY_CNT	3
+#define AMP_CALIBRATION_TIMEOUT		6
+
+/* A5XX_GPMU_GPMU_VOLTAGE_INTR_EN_MASK */
+#define VOLTAGE_INTR_EN			BIT(0)
+
+/* A5XX_GPMU_GPMU_PWR_THRESHOLD */
+#define PWR_THRESHOLD_VALID		0x80000000
+
+/* A5XX_GPMU_GPMU_SP_CLOCK_CONTROL */
+#define CNTL_IP_CLK_ENABLE		BIT(0)
+/* AGC */
+#define AGC_INIT_BASE			A5XX_GPMU_DATA_RAM_BASE
+#define AGC_INIT_MSG_MAGIC		(AGC_INIT_BASE + 5)
+#define AGC_MSG_BASE			(AGC_INIT_BASE + 7)
+
+#define AGC_MSG_STATE			(AGC_MSG_BASE + 0)
+#define AGC_MSG_COMMAND			(AGC_MSG_BASE + 1)
+#define AGC_MSG_PAYLOAD_SIZE		(AGC_MSG_BASE + 3)
+#define AGC_MSG_PAYLOAD			(AGC_MSG_BASE + 5)
+
+#define AGC_INIT_MSG_VALUE		0xBABEFACE
+#define AGC_POWER_CONFIG_PRODUCTION_ID	1
+
+#define AGC_LM_CONFIG			(136/4)
+#define AGC_LM_CONFIG_ENABLE_GPMU_ADAPTIVE (1)
+
+#define AGC_LM_CONFIG_ENABLE_ERROR	(3 << 4)
+#define AGC_LM_CONFIG_ISENSE_ENABLE     (1 << 4)
+
+#define AGC_THROTTLE_SEL_DCS		(1 << 8)
+#define AGC_THROTTLE_DISABLE            (2 << 8)
+
+
+#define AGC_LLM_ENABLED			(1 << 16)
+#define	AGC_GPU_VERSION_MASK		GENMASK(18, 17)
+#define AGC_GPU_VERSION_SHIFT		17
+#define AGC_BCL_DISABLED		(1 << 24)
+
+
+#define AGC_LEVEL_CONFIG		(140/4)
+
+#define LM_DCVS_LIMIT			1
+/* FW file tages */
+#define GPMU_FIRMWARE_ID		2
+#define GPMU_SEQUENCE_ID		3
+#define GPMU_INST_RAM_SIZE		0xFFF
+
+#define HEADER_MAJOR			1
+#define HEADER_MINOR			2
+#define HEADER_DATE			3
+#define HEADER_TIME			4
+#define HEADER_SEQUENCE			5
+
+#define MAX_HEADER_SIZE			10
+
+#define LM_SEQUENCE_ID			1
+#define MAX_SEQUENCE_ID			3
+
+#define GPMU_ISENSE_SAVE	(A5XX_GPMU_DATA_RAM_BASE + 200/4)
+/* LM defaults */
+#define LM_DEFAULT_LIMIT		6000
+#define A530_DEFAULT_LEAKAGE		0x004E001A
+
+static inline bool lm_on(struct adreno_device *adreno_dev)
+{
+	return ADRENO_FEATURE(adreno_dev, ADRENO_LM) &&
+		test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag);
+}
+
+/* Preemption functions */
+void a5xx_preemption_trigger(struct adreno_device *adreno_dev);
+void a5xx_preemption_schedule(struct adreno_device *adreno_dev);
+void a5xx_preemption_start(struct adreno_device *adreno_dev);
+int a5xx_preemption_init(struct adreno_device *adreno_dev);
+int a5xx_preemption_yield_enable(unsigned int *cmds);
+
+unsigned int a5xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+		unsigned int *cmds);
+unsigned int a5xx_preemption_pre_ibsubmit(
+			struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb,
+			unsigned int *cmds, struct kgsl_context *context);
+
+
+void a5xx_preempt_callback(struct adreno_device *adreno_dev, int bit);
+
+#endif
diff --git a/drivers/gpu/msm/adreno_a5xx_packets.h b/drivers/gpu/msm/adreno_a5xx_packets.h
new file mode 100644
index 0000000..0029dd6
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a5xx_packets.h
@@ -0,0 +1,1402 @@
+/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+enum adreno_critical_fixup_buffers {
+	CRITICAL_PACKET0 = 0,
+	CRITICAL_PACKET1,
+	CRITICAL_PACKET2,
+	CRITICAL_PACKET3,
+	CRITICAL_PACKET_MAX,
+};
+
+struct adreno_critical_fixup {
+	unsigned int lo_offset;
+	unsigned int hi_offset;
+	enum adreno_critical_fixup_buffers buffer;
+	uint64_t mem_offset;
+};
+
+static unsigned int _a5xx_critical_pkts[] = {
+	0x400E0601, /* [0x0000] == TYPE4 == */
+	0x00000002, /* [0x0001] A5X_HLSQ_MODE_CNTL (0x0E06)*/
+	0x40E78A01, /* [0x0002] == TYPE4 == */
+	0x000FFFFF, /* [0x0003] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/
+	0x48E78401, /* [0x0004] == TYPE4 == */
+	0x00000005, /* [0x0005] A5X_HLSQ_CNTL_0_CTX_0 (0xE784)*/
+	0x40E78501, /* [0x0006] == TYPE4 == */
+	0x00000009, /* [0x0007] A5X_HLSQ_CNTL_1_CTX_0 (0xE785)*/
+	0x48E78B85, /* [0x0008] == TYPE4 == */
+	0x00000001, /* [0x0009] A5X_HLSQ_VS_CONFIG_CTX_0 (0xE78B)*/
+	0x00002085, /* [0x000A] A5X_HLSQ_PS_CONFIG_CTX_0 (0xE78C)*/
+	0x00002084, /* [0x000B] A5X_HLSQ_HS_CONFIG_CTX_0 (0xE78D)*/
+	0x00002084, /* [0x000C] A5X_HLSQ_DS_CONFIG_CTX_0 (0xE78E)*/
+	0x00002084, /* [0x000D] A5X_HLSQ_GS_CONFIG_CTX_0 (0xE78F)*/
+	0x40E58485, /* [0x000E] == TYPE4 == */
+	0x00000001, /* [0x000F] A5X_SP_VS_CONFIG_CTX_0 (0xE584)*/
+	0x00002085, /* [0x0010] A5X_SP_PS_CONFIG_CTX_0 (0xE585)*/
+	0x00002084, /* [0x0011] A5X_SP_HS_CONFIG_CTX_0 (0xE586)*/
+	0x00002084, /* [0x0012] A5X_SP_DS_CONFIG_CTX_0 (0xE587)*/
+	0x00002084, /* [0x0013] A5X_SP_GS_CONFIG_CTX_0 (0xE588)*/
+	0x40E79101, /* [0x0014] == TYPE4 == */
+	0x00000004, /* [0x0015] A5X_HLSQ_VS_CNTL_CTX_0 (0xE791)*/
+	0x40E79201, /* [0x0016] == TYPE4 == */
+	0x00000002, /* [0x0017] A5X_HLSQ_PS_CNTL_CTX_0 (0xE792)*/
+	0x48E58001, /* [0x0018] == TYPE4 == */
+	0x00000010, /* [0x0019] A5X_SP_SP_CNTL_CTX_0 (0xE580)*/
+	0x70B00043, /* [0x001A] == TYPE7: LOAD_STATE (30) == */
+	0x00A00000, /* [0x001B] */
+	0x00000000, /* [0x001C] */
+	0x00000000, /* [0x001D] */
+	0x20020003, /* [0x001E] */
+	0x56D81803, /* [0x001F] */
+	0x00000003, /* [0x0020] */
+	0x20150000, /* [0x0021] */
+	0x00000000, /* [0x0022] */
+	0x00000200, /* [0x0023] */
+	0x00000000, /* [0x0024] */
+	0x201100F4, /* [0x0025] */
+	0x00000000, /* [0x0026] */
+	0x00000500, /* [0x0027] */
+	0x00000C21, /* [0x0028] */
+	0x20154004, /* [0x0029] */
+	0x00000C20, /* [0x002A] */
+	0x20154003, /* [0x002B] */
+	0x00000C23, /* [0x002C] */
+	0x20154008, /* [0x002D] */
+	0x00000C22, /* [0x002E] */
+	0x20156007, /* [0x002F] */
+	0x00000000, /* [0x0030] */
+	0x20554005, /* [0x0031] */
+	0x3F800000, /* [0x0032] */
+	0x20554006, /* [0x0033] */
+	0x00000000, /* [0x0034] */
+	0x03000000, /* [0x0035] */
+	0x20050000, /* [0x0036] */
+	0x46F00009, /* [0x0037] */
+	0x201F0000, /* [0x0038] */
+	0x4398000A, /* [0x0039] */
+	0x201F0009, /* [0x003A] */
+	0x43980809, /* [0x003B] */
+	0x20180009, /* [0x003C] */
+	0x46100809, /* [0x003D] */
+	0x00091014, /* [0x003E] */
+	0x62050009, /* [0x003F] */
+	0x00000000, /* [0x0040] */
+	0x00000500, /* [0x0041] */
+	0x04800006, /* [0x0042] */
+	0xC2C61300, /* [0x0043] */
+	0x0280000E, /* [0x0044] */
+	0xC2C61310, /* [0x0045] */
+	0x00000000, /* [0x0046] */
+	0x04800000, /* [0x0047] */
+	0x00000000, /* [0x0048] */
+	0x05000000, /* [0x0049] */
+	0x00000000, /* [0x004A] */
+	0x00000000, /* [0x004B] */
+	0x00000000, /* [0x004C] */
+	0x00000000, /* [0x004D] */
+	0x00000000, /* [0x004E] */
+	0x00000000, /* [0x004F] */
+	0x00000000, /* [0x0050] */
+	0x00000000, /* [0x0051] */
+	0x00000000, /* [0x0052] */
+	0x00000000, /* [0x0053] */
+	0x00000000, /* [0x0054] */
+	0x00000000, /* [0x0055] */
+	0x00000000, /* [0x0056] */
+	0x00000000, /* [0x0057] */
+	0x00000000, /* [0x0058] */
+	0x00000000, /* [0x0059] */
+	0x00000000, /* [0x005A] */
+	0x00000000, /* [0x005B] */
+	0x00000000, /* [0x005C] */
+	0x00000000, /* [0x005D] */
+	0x70B00023, /* [0x005E] == TYPE7: LOAD_STATE (30) == */
+	0x00700000, /* [0x005F] */
+	0x00000000, /* [0x0060] */
+	0x00000000, /* [0x0061] */
+	0x00000000, /* [0x0062] */
+	0x03000000, /* [0x0063] */
+	0x00000000, /* [0x0064] */
+	0x00000000, /* [0x0065] */
+	0x00000000, /* [0x0066] */
+	0x00000000, /* [0x0067] */
+	0x00000000, /* [0x0068] */
+	0x00000000, /* [0x0069] */
+	0x00000000, /* [0x006A] */
+	0x00000000, /* [0x006B] */
+	0x00000000, /* [0x006C] */
+	0x00000000, /* [0x006D] */
+	0x00000000, /* [0x006E] */
+	0x00000000, /* [0x006F] */
+	0x00000000, /* [0x0070] */
+	0x00000000, /* [0x0071] */
+	0x00000000, /* [0x0072] */
+	0x00000000, /* [0x0073] */
+	0x00000000, /* [0x0074] */
+	0x00000000, /* [0x0075] */
+	0x00000000, /* [0x0076] */
+	0x00000000, /* [0x0077] */
+	0x00000000, /* [0x0078] */
+	0x00000000, /* [0x0079] */
+	0x00000000, /* [0x007A] */
+	0x00000000, /* [0x007B] */
+	0x00000000, /* [0x007C] */
+	0x00000000, /* [0x007D] */
+	0x00000000, /* [0x007E] */
+	0x00000000, /* [0x007F] */
+	0x00000000, /* [0x0080] */
+	0x00000000, /* [0x0081] */
+	0x70B08003, /* [0x0082] == TYPE7: LOAD_STATE (30) == */
+	0x00620000, /* [0x0083] */
+	0x00000000, /* [0x0084] */
+	0x00000000, /* [0x0085] */
+	0x70B08003, /* [0x0086] == TYPE7: LOAD_STATE (30) == */
+	0x01220008, /* [0x0087] */
+	0x00000000, /* [0x0088] */
+	0x00000000, /* [0x0089] */
+	0x70B0000B, /* [0x008A] == TYPE7: LOAD_STATE (30) == */
+	0x01180000, /* [0x008B] */
+	0x00000001, /* [0x008C] */
+	0x00000000, /* [0x008D] */
+	0x00000000, /* [0x008E] */
+	0x00000000, /* [0x008F] */
+	0x00000000, /* [0x0090] */
+	0x00000000, /* [0x0091] */
+	0x00000000, /* [0x0092] */
+	0x00000000, /* [0x0093] */
+	0x00000000, /* [0x0094] */
+	0x01400000, /* [0x0095] */
+	0x70460001, /* [0x0096] == TYPE7: EVENT_WRITE (46) == */
+	0x00000019, /* [0x0097] */
+	0x70460004, /* [0x0098] == TYPE7: EVENT_WRITE (46) == */
+	0x0000001D, /* [0x0099] */
+	0x00000000, /* [0x009A] */
+	0x00000000, /* [0x009B] */
+	0x00000001, /* [0x009C] */
+	0x70460004, /* [0x009D] == TYPE7: EVENT_WRITE (46) == */
+	0x0000001C, /* [0x009E] */
+	0x00000000, /* [0x009F] */
+	0x00000000, /* [0x00A0] */
+	0x00000001, /* [0x00A1] */
+	0x480E9185, /* [0x00A2] == TYPE4 == */
+	0x00000000, /* [0x00A3] A5X_UCHE_CACHE_INVALIDATE_MIN_LO (0x0E91)*/
+	0x00000000, /* [0x00A4] A5X_UCHE_CACHE_INVALIDATE_MIN_HI (0x0E92)*/
+	0x00000000, /* [0x00A5] A5X_UCHE_CACHE_INVALIDATE_MAX_LO (0x0E93)*/
+	0x00000000, /* [0x00A6] A5X_UCHE_CACHE_INVALIDATE_MAX_HI (0x0E94)*/
+	0x00000012, /* [0x00A7] A5X_UCHE_CACHE_INVALIDATE (0x0E95)*/
+	0x70268000, /* [0x00A8] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x40E78A01, /* [0x00A9] == TYPE4 == */
+	0x000FFFFF, /* [0x00AA] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/
+	0x70D08003, /* [0x00AB] == TYPE7: PERFCOUNTER_ACTION (50) == */
+	0x00000000, /* [0x00AC] */
+	0x00000000, /* [0x00AD] */
+	0x00000000, /* [0x00AE] */
+	0x70D08003, /* [0x00AF] == TYPE7: PERFCOUNTER_ACTION (50) == */
+	0x00000010, /* [0x00B0] */
+	0x00000000, /* [0x00B1] */
+	0x00000000, /* [0x00B2] */
+	0x70268000, /* [0x00B3] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x48E38C01, /* [0x00B4] == TYPE4 == */
+	0xFFFFFFFF, /* [0x00B5] A5X_PC_RESTART_INDEX_CTX_0 (0xE38C)*/
+	0x40E38801, /* [0x00B6] == TYPE4 == */
+	0x00000012, /* [0x00B7] A5X_PC_RASTER_CNTL_CTX_0 (0xE388)*/
+	0x48E09102, /* [0x00B8] == TYPE4 == */
+	0xFFC00010, /* [0x00B9] A5X_GRAS_SU_POINT_MINMAX_CTX_0 (0xE091)*/
+	0x00000008, /* [0x00BA] A5X_GRAS_SU_POINT_SIZE_CTX_0 (0xE092)*/
+	0x40E09901, /* [0x00BB] == TYPE4 == */
+	0x00000000, /* [0x00BC] A5X_GRAS_SU_CONSERVATIVE_RAS_CNTL_CTX_0 (0xE099)*/
+	0x48E0A401, /* [0x00BD] == TYPE4 == */
+	0x00000000, /* [0x00BE] A5X_GRAS_SC_SCREEN_SCISSOR_CNTL_CTX_0 (0xE0A4)*/
+	0x48E58A01, /* [0x00BF] == TYPE4 == */
+	0x00000000, /* [0x00C0] A5X_SP_VS_CONFIG_MAX_CONST_CTX_0 (0xE58A)*/
+	0x40E58B01, /* [0x00C1] == TYPE4 == */
+	0x00000000, /* [0x00C2] A5X_SP_PS_CONFIG_MAX_CONST_CTX_0 (0xE58B)*/
+	0x480CC601, /* [0x00C3] == TYPE4 == */
+	0x00000044, /* [0x00C4] A5X_RB_MODE_CNTL (0x0CC6)*/
+	0x400CC401, /* [0x00C5] == TYPE4 == */
+	0x00100000, /* [0x00C6] A5X_RB_DBG_ECO_CNTL (0x0CC4)*/
+	0x400E4201, /* [0x00C7] == TYPE4 == */
+	0x00000000, /* [0x00C8] A5X_VFD_MODE_CNTL (0x0E42)*/
+	0x480D0201, /* [0x00C9] == TYPE4 == */
+	0x0000001F, /* [0x00CA] A5X_PC_MODE_CNTL (0x0D02)*/
+	0x480EC201, /* [0x00CB] == TYPE4 == */
+	0x0000001E, /* [0x00CC] A5X_SP_MODE_CNTL (0x0EC2)*/
+	0x400EC001, /* [0x00CD] == TYPE4 == */
+	0x40000800, /* [0x00CE] A5X_SP_DBG_ECO_CNTL (0x0EC0)*/
+	0x400F0201, /* [0x00CF] == TYPE4 == */
+	0x00000544, /* [0x00D0] A5X_TPL1_MODE_CNTL (0x0F02)*/
+	0x400E0002, /* [0x00D1] == TYPE4 == */
+	0x00000080, /* [0x00D2] A5X_HLSQ_TIMEOUT_THRESHOLD_0 (0x0E00)*/
+	0x00000000, /* [0x00D3] A5X_HLSQ_TIMEOUT_THRESHOLD_1 (0x0E01)*/
+	0x400E6001, /* [0x00D4] == TYPE4 == */
+	0x00000400, /* [0x00D5] A5X_VPC_DBG_ECO_CNTL (0x0E60)*/
+	0x400E0601, /* [0x00D6] == TYPE4 == */
+	0x00000001, /* [0x00D7] A5X_HLSQ_MODE_CNTL (0x0E06)*/
+	0x480E6201, /* [0x00D8] == TYPE4 == */
+	0x00000000, /* [0x00D9] A5X_VPC_MODE_CNTL (0x0E62)*/
+	0x70EC8005, /* [0x00DA] == TYPE7: SET_RENDER_MODE (6C) == */
+	0x00000002, /* [0x00DB] */
+	0x00000000, /* [0x00DC] */
+	0x00000000, /* [0x00DD] */
+	0x00000008, /* [0x00DE] */
+	0x00000001, /* [0x00DF] */
+	0x40E14001, /* [0x00E0] == TYPE4 == */
+	0x00000204, /* [0x00E1] A5X_RB_CNTL_CTX_0 (0xE140)*/
+	0x709D0001, /* [0x00E2] == TYPE7: SKIP_IB2_ENABLE_GLOBAL (1D) == */
+	0x00000000, /* [0x00E3] */
+	0x48E0EA02, /* [0x00E4] == TYPE4 == */
+	0x00000000, /* [0x00E5] A5X_GRAS_SC_WINDOW_SCISSOR_TL_CTX_0 (0xE0EA)*/
+	0x001F0073, /* [0x00E6] A5X_GRAS_SC_WINDOW_SCISSOR_BR_CTX_0 (0xE0EB)*/
+	0x48E21102, /* [0x00E7] == TYPE4 == */
+	0x00000000, /* [0x00E8] A5X_RB_RESOLVE_CNTL_1_CTX_0 (0xE211)*/
+	0x00000000, /* [0x00E9] A5X_RB_RESOLVE_CNTL_2_CTX_0 (0xE212)*/
+	0x480BC283, /* [0x00EA] == TYPE4 == */
+	0x00000204, /* [0x00EB] UNKNOWN (0x0BC2)*/
+	0x00000000, /* [0x00EC] UNKNOWN (0x0BC3)*/
+	0x00000000, /* [0x00ED] UNKNOWN (0x0BC4)*/
+	0x400BC502, /* [0x00EE] == TYPE4 == */
+	0x00000000, /* [0x00EF] UNKNOWN (0x0BC5)*/
+	0x00000000, /* [0x00F0] UNKNOWN (0x0BC6)*/
+	0x480BD001, /* [0x00F1] == TYPE4 == */
+	0x01100000, /* [0x00F2] UNKNOWN (0x0BD0)*/
+	0x480BE002, /* [0x00F3] == TYPE4 == */
+	0x00000000, /* [0x00F4] UNKNOWN (0x0BE0)*/
+	0x00000000, /* [0x00F5] UNKNOWN (0x0BE1)*/
+	0x480C0001, /* [0x00F6] == TYPE4 == */
+	0x00000020, /* [0x00F7] A5X_VSC_PIPE_DATA_LENGTH_0 (0x0C00)*/
+	0x48E3B001, /* [0x00F8] == TYPE4 == */
+	0x00000003, /* [0x00F9] A5X_PC_POWER_CNTL_CTX_0 (0xE3B0)*/
+	0x48E4F001, /* [0x00FA] == TYPE4 == */
+	0x00000003, /* [0x00FB] A5X_VFD_POWER_CNTL_CTX_0 (0xE4F0)*/
+	0x480E6201, /* [0x00FC] == TYPE4 == */
+	0x00000001, /* [0x00FD] A5X_VPC_MODE_CNTL (0x0E62)*/
+	0x70460001, /* [0x00FE] == TYPE7: EVENT_WRITE (46) == */
+	0x0000002C, /* [0x00FF] */
+	0x40E1D001, /* [0x0100] == TYPE4 == */
+	0x00000000, /* [0x0101] A5X_RB_WINDOW_OFFSET_CTX_0 (0xE1D0)*/
+	0x70BF8003, /* [0x0102] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */
+	0x00000000, /* [0x0103] */
+	0x00000000, /* [0x0104] */
+	0x000000A0, /* [0x0105] */
+	0x70460001, /* [0x0106] == TYPE7: EVENT_WRITE (46) == */
+	0x0000002D, /* [0x0107] */
+	0x70460004, /* [0x0108] == TYPE7: EVENT_WRITE (46) == */
+	0x00000004, /* [0x0109] */
+	0x00000000, /* [0x010A] */
+	0x00000000, /* [0x010B] */
+	0x00000000, /* [0x010C] */
+	0x70268000, /* [0x010D] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x480E6201, /* [0x010E] == TYPE4 == */
+	0x00000000, /* [0x010F] A5X_VPC_MODE_CNTL (0x0E62)*/
+	0x48E3B001, /* [0x0110] == TYPE4 == */
+	0x00000003, /* [0x0111] A5X_PC_POWER_CNTL_CTX_0 (0xE3B0)*/
+	0x48E4F001, /* [0x0112] == TYPE4 == */
+	0x00000003, /* [0x0113] A5X_VFD_POWER_CNTL_CTX_0 (0xE4F0)*/
+	0x70268000, /* [0x0114] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x400CC701, /* [0x0115] == TYPE4 == */
+	0x7C13C080, /* [0x0116] A5X_RB_CCU_CNTL (0x0CC7)*/
+	0x70EC8005, /* [0x0117] == TYPE7: SET_RENDER_MODE (6C) == */
+	0x00000001, /* [0x0118] */
+	0x00000000, /* [0x0119] */
+	0x00000000, /* [0x011A] */
+	0x00000010, /* [0x011B] */
+	0x00000001, /* [0x011C] */
+	0x70EA0001, /* [0x011D] == TYPE7: PREEMPT_ENABLE_LOCAL (6A) == */
+	0x00000000, /* [0x011E] */
+	0x48E0EA02, /* [0x011F] == TYPE4 == */
+	0x00000000, /* [0x0120] A5X_GRAS_SC_WINDOW_SCISSOR_TL_CTX_0 (0xE0EA)*/
+	0x001F0073, /* [0x0121] A5X_GRAS_SC_WINDOW_SCISSOR_BR_CTX_0 (0xE0EB)*/
+	0x48E21102, /* [0x0122] == TYPE4 == */
+	0x00000000, /* [0x0123] A5X_RB_RESOLVE_CNTL_1_CTX_0 (0xE211)*/
+	0x00030007, /* [0x0124] A5X_RB_RESOLVE_CNTL_2_CTX_0 (0xE212)*/
+	0x70138000, /* [0x0125] == TYPE7: WAIT_FOR_ME (13) == */
+	0x70640001, /* [0x0126] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */
+	0x00000000, /* [0x0127] */
+	0x702F8005, /* [0x0128] == TYPE7: SET_BIN_DATA (2F) == */
+	0x00010000, /* [0x0129] */
+	0x00000000, /* [0x012A] */
+	0x00000000, /* [0x012B] */
+	0x00000000, /* [0x012C] */
+	0x00000000, /* [0x012D] */
+	0x40E1D001, /* [0x012E] == TYPE4 == */
+	0x00000000, /* [0x012F] A5X_RB_WINDOW_OFFSET_CTX_0 (0xE1D0)*/
+	0x40E2A201, /* [0x0130] == TYPE4 == */
+	0x00000001, /* [0x0131] A5X_VPC_SO_OVERRIDE_CTX_0 (0xE2A2)*/
+	0x70640001, /* [0x0132] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */
+	0x00000000, /* [0x0133] */
+	0x48E1B285, /* [0x0134] == TYPE4 == */
+	0x00000001, /* [0x0135] A5X_RB_DEPTH_BUFFER_INFO_CTX_0 (0xE1B2)*/
+	0x00004000, /* [0x0136] A5X_RB_DEPTH_BUFFER_BASE_LO_CTX_0 (0xE1B3)*/
+	0x00000000, /* [0x0137] A5X_RB_DEPTH_BUFFER_BASE_HI_CTX_0 (0xE1B4)*/
+	0x00000004, /* [0x0138] A5X_RB_DEPTH_BUFFER_PITCH_CTX_0 (0xE1B5)*/
+	0x000000C0, /* [0x0139] A5X_RB_DEPTH_BUFFER_ARRAY_PITCH_CTX_0 (0xE1B6)*/
+	0x48E09801, /* [0x013A] == TYPE4 == */
+	0x00000001, /* [0x013B] A5X_GRAS_SU_DEPTH_BUFFER_INFO_CTX_0 (0xE098)*/
+	0x40E24083, /* [0x013C] == TYPE4 == */
+	0x00000000, /* [0x013D] A5X_RB_DEPTH_FLAG_BUFFER_BASE_LO_CTX_0 (0xE240)*/
+	0x00000000, /* [0x013E] A5X_RB_DEPTH_FLAG_BUFFER_BASE_HI_CTX_0 (0xE241)*/
+	0x00000000, /* [0x013F] A5X_RB_DEPTH_FLAG_BUFFER_PITCH_CTX_0 (0xE242)*/
+	0x40E15285, /* [0x0140] == TYPE4 == */
+	0x00001230, /* [0x0141] A5X_RB_MRT_BUFFER_INFO_0_CTX_0 (0xE152)*/
+	0x00000008, /* [0x0142] A5X_RB_MRT_BUFFER_PITCH_0_CTX_0 (0xE153)*/
+	0x00000100, /* [0x0143] A5X_RB_MRT_BUFFER_ARRAY_PITCH_0_CTX_0 (0xE154)*/
+	0x00000000, /* [0x0144] A5X_RB_MRT_BUFFER_BASE_LO_0_CTX_0 (0xE155)*/
+	0x00000000, /* [0x0145] A5X_RB_MRT_BUFFER_BASE_HI_0_CTX_0 (0xE156)*/
+	0x40E40801, /* [0x0146] == TYPE4 == */
+	0x00000000, /* [0x0147] A5X_VFD_INDEX_OFFSET_CTX_0 (0xE408)*/
+	0x48E40901, /* [0x0148] == TYPE4 == */
+	0x00000000, /* [0x0149] A5X_VFD_INSTANCE_START_OFFSET_CTX_0 (0xE409)*/
+	0x70BF8003, /* [0x014A] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */
+	0x00000000, /* [0x014B] */
+	0x00000000, /* [0x014C] */
+	0x00000112, /* [0x014D] */
+	0x70230001, /* [0x014E] == TYPE7: SKIP_IB2_ENABLE_LOCAL (23) == */
+	0x00000000, /* [0x014F] */
+	0x70BF8003, /* [0x0150] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */
+	0x00000000, /* [0x0151] */
+	0x00000000, /* [0x0152] */
+	0x0000001B, /* [0x0153] */
+	0x70EC8005, /* [0x0154] == TYPE7: SET_RENDER_MODE (6C) == */
+	0x00000001, /* [0x0155] */
+	0x00000000, /* [0x0156] */
+	0x00000000, /* [0x0157] */
+	0x00000000, /* [0x0158] */
+	0x00000001, /* [0x0159] */
+	0x70438003, /* [0x015A] == TYPE7: SET_DRAW_STATE (43) == */
+	0x00080059, /* [0x015B] */
+	0x00000000, /* [0x015C] */
+	0x00000000, /* [0x015D] */
+	0x70388003, /* [0x015E] == TYPE7: DRAW_INDX_OFFSET (38) == */
+	0x00000888, /* [0x015F] */
+	0x00000000, /* [0x0160] */
+	0x00000002, /* [0x0161] */
+	0x70A88003, /* [0x0162] == TYPE7: DRAW_INDIRECT (28) == */
+	0x00200884, /* [0x0163] */
+	0x00000000, /* [0x0164] */
+	0x00000000, /* [0x0165] */
+	0x70298006, /* [0x0166] == TYPE7: DRAW_INDX_INDIRECT (29) == */
+	0x00200404, /* [0x0167] */
+	0x00000000, /* [0x0168] */
+	0x00000000, /* [0x0169] */
+	0x00000006, /* [0x016A] */
+	0x00000000, /* [0x016B] */
+	0x00000000, /* [0x016C] */
+	0x40E2A783, /* [0x016D] == TYPE4 == */
+	0x00000000, /* [0x016E] A5X_VPC_SO_BUFFER_BASE_LO_0_CTX_0 (0xE2A7)*/
+	0x00000000, /* [0x016F] A5X_VPC_SO_BUFFER_BASE_HI_0_CTX_0 (0xE2A8)*/
+	0x00000004, /* [0x0170] A5X_VPC_SO_BUFFER_SIZE_0_CTX_0 (0xE2A9)*/
+	0x48E2AC02, /* [0x0171] == TYPE4 == */
+	0x00000000, /* [0x0172] A5X_VPC_SO_FLUSH_BASE_LO_0_CTX_0 (0xE2AC)*/
+	0x00000000, /* [0x0173] A5X_VPC_SO_FLUSH_BASE_HI_0_CTX_0 (0xE2AD)*/
+	0x70460001, /* [0x0174] == TYPE7: EVENT_WRITE (46) == */
+	0x00000011, /* [0x0175] */
+	0x48E10001, /* [0x0176] == TYPE4 == */
+	0x00000009, /* [0x0177] A5X_GRAS_LRZ_CNTL_CTX_0 (0xE100)*/
+	0x70460001, /* [0x0178] == TYPE7: EVENT_WRITE (46) == */
+	0x00000026, /* [0x0179] */
+	0x48E10001, /* [0x017A] == TYPE4 == */
+	0x00000008, /* [0x017B] A5X_GRAS_LRZ_CNTL_CTX_0 (0xE100)*/
+	0x40E10185, /* [0x017C] == TYPE4 == */
+	0x00000000, /* [0x017D] A5X_GRAS_LRZ_BUFFER_BASE_LO_CTX_0 (0xE101)*/
+	0x00000000, /* [0x017E] A5X_GRAS_LRZ_BUFFER_BASE_HI_CTX_0 (0xE102)*/
+	0x00000001, /* [0x017F] A5X_GRAS_LRZ_BUFFER_PITCH_CTX_0 (0xE103)*/
+	0x00000000, /* [0x0180] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO_CTX_0 (0xE104)*/
+	0x00000000, /* [0x0181] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI_CTX_0 (0xE105)*/
+	0x70460001, /* [0x0182] == TYPE7: EVENT_WRITE (46) == */
+	0x00000025, /* [0x0183] */
+	0x70460001, /* [0x0184] == TYPE7: EVENT_WRITE (46) == */
+	0x00000019, /* [0x0185] */
+	0x70460001, /* [0x0186] == TYPE7: EVENT_WRITE (46) == */
+	0x00000018, /* [0x0187] */
+	0x70EA0001, /* [0x0188] == TYPE7: PREEMPT_ENABLE_LOCAL (6A) == */
+	0x00000000, /* [0x0189] */
+	0x70EC0001, /* [0x018A] == TYPE7: SET_RENDER_MODE (6C) == */
+	0x00000006, /* [0x018B] */
+	0x70438003, /* [0x018C] == TYPE7: SET_DRAW_STATE (43) == */
+	0x00080059, /* [0x018D] */
+	0x00000000, /* [0x018E] */
+	0x00000000, /* [0x018F] */
+	0x70DC0002, /* [0x0190] == TYPE7: CONTEXT_REG_BUNCH (5C) == */
+	0x0000E2A1, /* [0x0191] */
+	0x00008001, /* [0x0192] */
+	0x709D0001, /* [0x0193] == TYPE7: SKIP_IB2_ENABLE_GLOBAL (1D) == */
+	0x00000000, /* [0x0194] */
+	0x70138000, /* [0x0195] == TYPE7: WAIT_FOR_ME (13) == */
+	0x70640001, /* [0x0196] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */
+	0x00000001, /* [0x0197] */
+	0x70380007, /* [0x0198] == TYPE7: DRAW_INDX_OFFSET (38) == */
+	0x00200506, /* [0x0199] */
+	0x00000000, /* [0x019A] */
+	0x00000004, /* [0x019B] */
+	0x00000000, /* [0x019C] */
+	0x00000000, /* [0x019D] */
+	0x00000000, /* [0x019E] */
+	0x00000004, /* [0x019F] */
+	0x703D8005, /* [0x01A0] == TYPE7: MEM_WRITE (3D) == */
+	0x00000000, /* [0x01A1] */
+	0x00000000, /* [0x01A2] */
+	0x00000001, /* [0x01A3] */
+	0x00000001, /* [0x01A4] */
+	0x00000001, /* [0x01A5] */
+	0x70928000, /* [0x01A6] == TYPE7: WAIT_MEM_WRITES (12) == */
+	0x70BF8003, /* [0x01A7] == TYPE7: INDIRECT_BUFFER_PFE (3F) == */
+	0x00000000, /* [0x01A8] */
+	0x00000000, /* [0x01A9] */
+	0x00000028, /* [0x01AA] */
+	0x70C48006, /* [0x01AB] == TYPE7: COND_EXEC (44) == */
+	0x00000000, /* [0x01AC] */
+	0x00000000, /* [0x01AD] */
+	0x00000000, /* [0x01AE] */
+	0x00000000, /* [0x01AF] */
+	0x00000001, /* [0x01B0] */
+	0x00000002, /* [0x01B1] */
+	0x70100001, /* [0x01B2] == TYPE7: NOP (10) == */
+	0x00000000, /* [0x01B3] */
+	0x70C28003, /* [0x01B4] == TYPE7: MEM_TO_REG (42) == */
+	0xC000E2AB, /* [0x01B5] */
+	0x00000000, /* [0x01B6] */
+	0x00000000, /* [0x01B7] */
+	0x70230001, /* [0x01B8] == TYPE7: SKIP_IB2_ENABLE_LOCAL (23) == */
+	0x00000000, /* [0x01B9] */
+	0x70E90001, /* [0x01BA] == TYPE7: PREEMPT_ENABLE_GLOBAL (69) == */
+	0x00000000, /* [0x01BB] */
+	0x70BC8006, /* [0x01BC] == TYPE7: WAIT_REG_MEM (3C) == */
+	0x00000010, /* [0x01BD] */
+	0x00000000, /* [0x01BE] */
+	0x00000000, /* [0x01BF] */
+	0x00000001, /* [0x01C0] */
+	0xFFFFFFFF, /* [0x01C1] */
+	0x00000001, /* [0x01C2] */
+	0x70738009, /* [0x01C3] == TYPE7: MEM_TO_MEM (73) == */
+	0x20000004, /* [0x01C4] */
+	0x00000000, /* [0x01C5] */
+	0x00000000, /* [0x01C6] */
+	0x00000000, /* [0x01C7] */
+	0x00000000, /* [0x01C8] */
+	0x00000000, /* [0x01C9] */
+	0x00000000, /* [0x01CA] */
+	0x00000000, /* [0x01CB] */
+	0x00000000, /* [0x01CC] */
+	0x70738009, /* [0x01CD] == TYPE7: MEM_TO_MEM (73) == */
+	0xE0000004, /* [0x01CE] */
+	0x00000000, /* [0x01CF] */
+	0x00000000, /* [0x01D0] */
+	0x00000000, /* [0x01D1] */
+	0x00000000, /* [0x01D2] */
+	0x00000000, /* [0x01D3] */
+	0x00000000, /* [0x01D4] */
+	0x00000000, /* [0x01D5] */
+	0x00000000, /* [0x01D6] */
+	0x70B50001, /* [0x01D7] == TYPE7: SET_SUBDRAW_SIZE (35) == */
+	0x00000001, /* [0x01D8] */
+	0x40E78A01, /* [0x01D9] == TYPE4 == */
+	0x000FFFFF, /* [0x01DA] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/
+	0x70268000, /* [0x01DB] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x400E0601, /* [0x01DC] == TYPE4 == */
+	0x00000001, /* [0x01DD] A5X_HLSQ_MODE_CNTL (0x0E06)*/
+	0x706E0004, /* [0x01DE] == TYPE7: COMPUTE_CHECKPOINT (6E) == */
+	0x00000000, /* [0x01DF] */
+	0x00000000, /* [0x01E0] */
+	0x00000018, /* [0x01E1] */
+	0x00000001, /* [0x01E2] */
+	0x40E14001, /* [0x01E3] == TYPE4 == */
+	0x00020000, /* [0x01E4] A5X_RB_CNTL_CTX_0 (0xE140)*/
+	0x40E78A01, /* [0x01E5] == TYPE4 == */
+	0x01F00000, /* [0x01E6] A5X_HLSQ_UPDATE_CNTL_CTX_0 (0xE78A)*/
+	0x70268000, /* [0x01E7] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x48E38C01, /* [0x01E8] == TYPE4 == */
+	0xFFFFFFFF, /* [0x01E9] A5X_PC_RESTART_INDEX_CTX_0 (0xE38C)*/
+	0x480D0201, /* [0x01EA] == TYPE4 == */
+	0x0000001F, /* [0x01EB] A5X_PC_MODE_CNTL (0x0D02)*/
+	0x480EC201, /* [0x01EC] == TYPE4 == */
+	0x0000001E, /* [0x01ED] A5X_SP_MODE_CNTL (0x0EC2)*/
+	0x48E58001, /* [0x01EE] == TYPE4 == */
+	0x00000000, /* [0x01EF] A5X_SP_SP_CNTL_CTX_0 (0xE580)*/
+	0x40E2A201, /* [0x01F0] == TYPE4 == */
+	0x00000001, /* [0x01F1] A5X_VPC_SO_OVERRIDE_CTX_0 (0xE2A2)*/
+	0x70640001, /* [0x01F2] == TYPE7: SET_VISIBILITY_OVERRIDE (64) == */
+	0x00000001, /* [0x01F3] */
+	0x48E78401, /* [0x01F4] == TYPE4 == */
+	0x00000881, /* [0x01F5] A5X_HLSQ_CNTL_0_CTX_0 (0xE784)*/
+	0x40E5F001, /* [0x01F6] == TYPE4 == */
+	0x00000C06, /* [0x01F7] A5X_SP_CS_CNTL_0_CTX_0 (0xE5F0)*/
+	0x48E79001, /* [0x01F8] == TYPE4 == */
+	0x00000001, /* [0x01F9] A5X_HLSQ_CS_CONFIG_CTX_0 (0xE790)*/
+	0x48E79601, /* [0x01FA] == TYPE4 == */
+	0x00000005, /* [0x01FB] A5X_HLSQ_CS_CNTL_CTX_0 (0xE796)*/
+	0x48E58901, /* [0x01FC] == TYPE4 == */
+	0x00000001, /* [0x01FD] A5X_SP_CS_CONFIG_CTX_0 (0xE589)*/
+	0x40E7DC01, /* [0x01FE] == TYPE4 == */
+	0x00000030, /* [0x01FF] A5X_HLSQ_CONTEXT_SWITCH_CS_SW_3_CTX_0 (0xE7DC)*/
+	0x48E7DD01, /* [0x0200] == TYPE4 == */
+	0x00000002, /* [0x0201] A5X_HLSQ_CONTEXT_SWITCH_CS_SW_4_CTX_0 (0xE7DD)*/
+	0x40E7B001, /* [0x0202] == TYPE4 == */
+	0x00000003, /* [0x0203] A5X_HLSQ_CS_NDRANGE_0_CTX_0 (0xE7B0)*/
+	0x48E7B702, /* [0x0204] == TYPE4 == */
+	0x00FCC0CF, /* [0x0205] A5X_HLSQ_CS_CNTL_0_CTX_0 (0xE7B7)*/
+	0x00000000, /* [0x0206] A5X_HLSQ_CS_CNTL_1_CTX_0 (0xE7B8)*/
+	0x40E7B983, /* [0x0207] == TYPE4 == */
+	0x00000001, /* [0x0208] A5X_HLSQ_CS_KERNEL_GROUP_X_CTX_0 (0xE7B9)*/
+	0x00000001, /* [0x0209] A5X_HLSQ_CS_KERNEL_GROUP_Y_CTX_0 (0xE7BA)*/
+	0x00000001, /* [0x020A] A5X_HLSQ_CS_KERNEL_GROUP_Z_CTX_0 (0xE7BB)*/
+	0x70B08003, /* [0x020B] == TYPE7: LOAD_STATE (30) == */
+	0x00B60000, /* [0x020C] */
+	0x00000000, /* [0x020D] */
+	0x00000000, /* [0x020E] */
+	0x70B08003, /* [0x020F] == TYPE7: LOAD_STATE (30) == */
+	0x01360008, /* [0x0210] */
+	0x00000000, /* [0x0211] */
+	0x00000000, /* [0x0212] */
+	0x70B0000B, /* [0x0213] == TYPE7: LOAD_STATE (30) == */
+	0x00BC0000, /* [0x0214] */
+	0x00000000, /* [0x0215] */
+	0x00000000, /* [0x0216] */
+	0x00000000, /* [0x0217] */
+	0x00000000, /* [0x0218] */
+	0x00000000, /* [0x0219] */
+	0x00000000, /* [0x021A] */
+	0x00000000, /* [0x021B] */
+	0x00000000, /* [0x021C] */
+	0x00000000, /* [0x021D] */
+	0x00000000, /* [0x021E] */
+	0x70B00007, /* [0x021F] == TYPE7: LOAD_STATE (30) == */
+	0x00BC0000, /* [0x0220] */
+	0x00000001, /* [0x0221] */
+	0x00000000, /* [0x0222] */
+	0x00040000, /* [0x0223] */
+	0x00000000, /* [0x0224] */
+	0x00040000, /* [0x0225] */
+	0x00000000, /* [0x0226] */
+	0x70B00007, /* [0x0227] == TYPE7: LOAD_STATE (30) == */
+	0x00BC0000, /* [0x0228] */
+	0x00000002, /* [0x0229] */
+	0x00000000, /* [0x022A] */
+	0x00000000, /* [0x022B] */
+	0x00000000, /* [0x022C] */
+	0x00000000, /* [0x022D] */
+	0x00000000, /* [0x022E] */
+	0x48E7B186, /* [0x022F] == TYPE4 == */
+	0x00000001, /* [0x0230] A5X_HLSQ_CS_NDRANGE_1_CTX_0 (0xE7B1)*/
+	0x00000000, /* [0x0231] A5X_HLSQ_CS_NDRANGE_2_CTX_0 (0xE7B2)*/
+	0x00000001, /* [0x0232] A5X_HLSQ_CS_NDRANGE_3_CTX_0 (0xE7B3)*/
+	0x00000000, /* [0x0233] A5X_HLSQ_CS_NDRANGE_4_CTX_0 (0xE7B4)*/
+	0x00000001, /* [0x0234] A5X_HLSQ_CS_NDRANGE_5_CTX_0 (0xE7B5)*/
+	0x00000000, /* [0x0235] A5X_HLSQ_CS_NDRANGE_6_CTX_0 (0xE7B6)*/
+	0x70B30004, /* [0x0236] == TYPE7: EXEC_CS (33) == */
+	0x00000000, /* [0x0237] */
+	0x00000001, /* [0x0238] */
+	0x00000001, /* [0x0239] */
+	0x00000001, /* [0x023A] */
+	0x480E6201, /* [0x023B] == TYPE4 == */
+	0x00000001, /* [0x023C] A5X_VPC_MODE_CNTL (0x0E62)*/
+};
+
+/*
+ * These are fixups for the addresses _a5xx_critical_pkts[]. The first two
+ * numbers are the dword offsets into the buffer above.  The third enum is a
+ * clue as to which buffer is being patched in and the final number is an offset
+ * in said buffer.
+ */
+static const struct adreno_critical_fixup critical_pkt_fixups[] = {
+	{ 132, 133, CRITICAL_PACKET2, 0x0000 },
+	{ 136, 137, CRITICAL_PACKET2, 0x0001 },
+	{ 154, 155, CRITICAL_PACKET2, 0x0100 },
+	{ 159, 160, CRITICAL_PACKET2, 0x0104 },
+	{ 173, 174, CRITICAL_PACKET2, 0x0200 },
+	{ 177, 178, CRITICAL_PACKET2, 0x0300 },
+	{ 236, 237, CRITICAL_PACKET0, 0x0000 },
+	{ 244, 245, CRITICAL_PACKET0, 0x0040 },
+	{ 259, 260, CRITICAL_PACKET3, 0x0000 },
+	{ 266, 267, CRITICAL_PACKET2, 0x0108 },
+	{ 298, 299, CRITICAL_PACKET0, 0x0040 },
+	{ 300, 301, CRITICAL_PACKET2, 0x0080 },
+	{ 331, 332, CRITICAL_PACKET3, 0x02A0 },
+	{ 337, 338, CRITICAL_PACKET3, 0x0700 },
+	{ 348, 349, CRITICAL_PACKET3, 0x0920 },
+	{ 356, 357, CRITICAL_PACKET1, 0x008C },
+	{ 360, 361, CRITICAL_PACKET1, 0x0080 },
+	{ 363, 364, CRITICAL_PACKET1, 0x008C },
+	{ 366, 367, CRITICAL_PACKET0, 0x0100 },
+	{ 370, 371, CRITICAL_PACKET0, 0x0120 },
+	{ 381, 382, CRITICAL_PACKET1, 0x0480 },
+	{ 384, 385, CRITICAL_PACKET1, 0x0400 },
+	{ 398, 399, CRITICAL_PACKET3, 0x0920 },
+	{ 413, 414, CRITICAL_PACKET1, 0x0080 },
+	{ 417, 418, CRITICAL_PACKET1, 0x0300 },
+	{ 424, 425, CRITICAL_PACKET3, 0x0880 },
+	{ 428, 429, CRITICAL_PACKET1, 0x0300 },
+	{ 430, 431, CRITICAL_PACKET1, 0x0300 },
+	{ 438, 439, CRITICAL_PACKET1, 0x0300 },
+	{ 446, 447, CRITICAL_PACKET1, 0x0300 },
+	{ 453, 454, CRITICAL_PACKET1, 0x0320 },
+	{ 455, 456, CRITICAL_PACKET1, 0x0300 },
+	{ 457, 458, CRITICAL_PACKET1, 0x0304 },
+	{ 459, 460, CRITICAL_PACKET1, 0x0308 },
+	{ 463, 464, CRITICAL_PACKET1, 0x0320 },
+	{ 465, 466, CRITICAL_PACKET1, 0x0300 },
+	{ 467, 468, CRITICAL_PACKET1, 0x0304 },
+	{ 469, 470, CRITICAL_PACKET1, 0x0308 },
+	{ 525, 526, CRITICAL_PACKET1, 0x0160 },
+	{ 529, 530, CRITICAL_PACKET1, 0x0101 },
+	{ 535, 536, CRITICAL_PACKET1, 0x0140 },
+	{ 539, 540, CRITICAL_PACKET0, 0x0800 },
+	{ 555, 556, CRITICAL_PACKET1, 0x0140 },
+	{ 557, 558, CRITICAL_PACKET0, 0x0800 },
+};
+
+static unsigned int _a5xx_critical_pkts_mem01[] = {
+	0xBECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0x3ECCCCCD,
+	0xBECCCCCD, 0x00000000, 0xBECCCCCD, 0x3ECCCCCD, 0x3ECCCCCD, 0x00000000,
+	0xBECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0xBECCCCCD,
+	0xBECCCCCD, 0x00000000, 0xBECCCCCD, 0xBECCCCCD, 0x3ECCCCCD, 0x00000000,
+	0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000, 0x3ECCCCCD, 0x00000000,
+	0x00000000, 0x00000000, 0x00040003, 0x00090005, 0x000B000A, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000003, 0x00000001,
+	0x00000006, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000003, 0x00000001, 0x00000003, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000001, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x3EF5C28F, 0x3ED1EB85, 0x3E6147AE, 0x3F800000,
+	0x3E947AE1, 0x3E6147AE, 0x3D4CCCCD, 0x3F800000, 0x00000000, 0x20554002,
+	0x3F800000, 0x20444003, 0x000000CF, 0x20044904, 0x00000000, 0x00000200,
+	0x00050001, 0x42300001, 0x00000002, 0x20154005, 0x00000020, 0x20244006,
+	0x00000000, 0x00000000, 0x10200001, 0x46500007, 0x20030004, 0x46D00004,
+	0x00000000, 0x20554008, 0x00070001, 0x61830806, 0x00061020, 0x61808001,
+	0x00040000, 0x42380800, 0x00010000, 0x42380800, 0x20040000, 0x46D80800,
+	0x00000000, 0x20154007, 0x20020000, 0x46F80000, 0x00000007, 0x20154001,
+	0x00000000, 0x00000200, 0x60030001, 0x43900004, 0x60030001, 0x43900001,
+	0x00000000, 0x00000400, 0x00013600, 0xC6E20004, 0x40040003, 0x50180104,
+	0x40060003, 0x40180803, 0x00000003, 0x20044006, 0x00000000, 0x00000500,
+	0x00003609, 0xC7260201, 0x00000000, 0x03000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000,
+};
+
+static unsigned int _a5xx_critical_pkts_mem02[] = {
+	0x00000000, 0x03000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x0000000C, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x8ACFE7F3, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
+static unsigned int _a5xx_critical_pkts_mem03[] = {
+	0x70438003, /* [0x0000] == TYPE7: SET_DRAW_STATE (43) == */
+	0x0008003A, /* [0x0001] */
+	0x00000000, /* [0x0002] */
+	0x00000000, /* [0x0003] */
+	0x70B08003, /* [0x0004] == TYPE7: LOAD_STATE (30) == */
+	0x00620000, /* [0x0005] */
+	0x00000000, /* [0x0006] */
+	0x00000000, /* [0x0007] */
+	0x40E29801, /* [0x0008] == TYPE4 == */
+	0x0000FFFF, /* [0x0009] A5X_VPC_GS_SIV_CNTL_CTX_0 (0xE298)*/
+	0x48E2A001, /* [0x000A] == TYPE4 == */
+	0x000000FF, /* [0x000B] A5X_VPC_PS_PRIMITIVEID_CNTL_CTX_0 (0xE2A0)*/
+	0x40E40185, /* [0x000C] == TYPE4 == */
+	0x00FCFCFC, /* [0x000D] A5X_VFD_CNTL_1_CTX_0 (0xE401)*/
+	0x0000FCFC, /* [0x000E] A5X_VFD_CNTL_2_CTX_0 (0xE402)*/
+	0x0000FCFC, /* [0x000F] A5X_VFD_CNTL_3_CTX_0 (0xE403)*/
+	0x000000FC, /* [0x0010] A5X_VFD_CNTL_4_CTX_0 (0xE404)*/
+	0x00000000, /* [0x0011] A5X_VFD_CNTL_5_CTX_0 (0xE405)*/
+	0x48E38F01, /* [0x0012] == TYPE4 == */
+	0x00000000, /* [0x0013] A5X_PC_HS_PARAM_CTX_0 (0xE38F)*/
+	0x48E58001, /* [0x0014] == TYPE4 == */
+	0x00000010, /* [0x0015] A5X_SP_SP_CNTL_CTX_0 (0xE580)*/
+	0x40E00001, /* [0x0016] == TYPE4 == */
+	0x00000080, /* [0x0017] A5X_GRAS_CL_CNTL_CTX_0 (0xE000)*/
+	0x40E09583, /* [0x0018] == TYPE4 == */
+	0x00000000, /* [0x0019] A5X_GRAS_SU_POLY_OFFSET_SCALE_CTX_0 (0xE095)*/
+	0x00000000, /* [0x001A] A5X_GRAS_SU_POLY_OFFSET_OFFSET_CTX_0 (0xE096)*/
+	0x00000000, /* [0x001B] A5X_GRAS_SU_POLY_OFFSET_OFFSET_CLAMP_CTX_0 (0xE097)*/
+	0x40E09001, /* [0x001C] == TYPE4 == */
+	0x00000010, /* [0x001D] A5X_GRAS_SU_CNTL_CTX_0 (0xE090)*/
+	0x40E0AA02, /* [0x001E] == TYPE4 == */
+	0x00000000, /* [0x001F] A5X_GRAS_SC_SCREEN_SCISSOR_TL_0_CTX_0 (0xE0AA)*/
+	0x001F0073, /* [0x0020] A5X_GRAS_SC_SCREEN_SCISSOR_BR_0_CTX_0 (0xE0AB)*/
+	0x48E01086, /* [0x0021] == TYPE4 == */
+	0x42680000, /* [0x0022] A5X_GRAS_CL_VIEWPORT_XOFFSET_0_CTX_0 (0xE010)*/
+	0x42680000, /* [0x0023] A5X_GRAS_CL_VIEWPORT_XSCALE_0_CTX_0 (0xE011)*/
+	0x41800000, /* [0x0024] A5X_GRAS_CL_VIEWPORT_YOFFSET_0_CTX_0 (0xE012)*/
+	0xC1800000, /* [0x0025] A5X_GRAS_CL_VIEWPORT_YSCALE_0_CTX_0 (0xE013)*/
+	0x3EFFFEE0, /* [0x0026] A5X_GRAS_CL_VIEWPORT_ZOFFSET_0_CTX_0 (0xE014)*/
+	0x3EFFFEE0, /* [0x0027] A5X_GRAS_CL_VIEWPORT_ZSCALE_0_CTX_0 (0xE015)*/
+	0x40E0CA02, /* [0x0028] == TYPE4 == */
+	0x00000000, /* [0x0029] A5X_GRAS_SC_VIEWPORT_SCISSOR_TL_0_CTX_0 (0xE0CA)*/
+	0x001F0073, /* [0x002A] A5X_GRAS_SC_VIEWPORT_SCISSOR_BR_0_CTX_0 (0xE0CB)*/
+	0x40E00601, /* [0x002B] == TYPE4 == */
+	0x0007FDFF, /* [0x002C] A5X_GRAS_CL_GUARDBAND_CLIP_ADJ_CTX_0 (0xE006)*/
+	0x40E70401, /* [0x002D] == TYPE4 == */
+	0x00000000, /* [0x002E] A5X_TPL1_TP_RAS_MSAA_CNTL_CTX_0 (0xE704)*/
+	0x48E70501, /* [0x002F] == TYPE4 == */
+	0x00000004, /* [0x0030] A5X_TPL1_TP_DEST_MSAA_CNTL_CTX_0 (0xE705)*/
+	0x48E14201, /* [0x0031] == TYPE4 == */
+	0x00000000, /* [0x0032] A5X_RB_RAS_MSAA_CNTL_CTX_0 (0xE142)*/
+	0x40E14301, /* [0x0033] == TYPE4 == */
+	0x00000004, /* [0x0034] A5X_RB_DEST_MSAA_CNTL_CTX_0 (0xE143)*/
+	0x40E78683, /* [0x0035] == TYPE4 == */
+	0xFCFCFCFC, /* [0x0036] A5X_HLSQ_CNTL_2_CTX_0 (0xE786)*/
+	0xFCFCFCFC, /* [0x0037] A5X_HLSQ_CNTL_3_CTX_0 (0xE787)*/
+	0xFCFCFCFC, /* [0x0038] A5X_HLSQ_CNTL_4_CTX_0 (0xE788)*/
+	0x48E0A201, /* [0x0039] == TYPE4 == */
+	0x00000000, /* [0x003A] A5X_GRAS_SC_RAS_MSAA_CNTL_CTX_0 (0xE0A2)*/
+	0x40E0A301, /* [0x003B] == TYPE4 == */
+	0x00000004, /* [0x003C] A5X_GRAS_SC_DEST_MSAA_CNTL_CTX_0 (0xE0A3)*/
+	0x48E14101, /* [0x003D] == TYPE4 == */
+	0x0000C089, /* [0x003E] A5X_RB_RENDER_CNTL_CTX_0 (0xE141)*/
+	0x40E0A001, /* [0x003F] == TYPE4 == */
+	0x00000009, /* [0x0040] A5X_GRAS_SC_CNTL_CTX_0 (0xE0A0)*/
+	0x40E28001, /* [0x0041] == TYPE4 == */
+	0x00010004, /* [0x0042] A5X_VPC_CNTL_0_CTX_0 (0xE280)*/
+	0x40E38401, /* [0x0043] == TYPE4 == */
+	0x00000404, /* [0x0044] A5X_PC_PRIMITIVE_CNTL_CTX_0 (0xE384)*/
+	0x40E78501, /* [0x0045] == TYPE4 == */
+	0x0000003F, /* [0x0046] A5X_HLSQ_CNTL_1_CTX_0 (0xE785)*/
+	0x48E5D301, /* [0x0047] == TYPE4 == */
+	0x00000030, /* [0x0048] A5X_SP_PS_MRT_0_CTX_0 (0xE5D3)*/
+	0x48E5CB01, /* [0x0049] == TYPE4 == */
+	0x00000100, /* [0x004A] A5X_SP_PS_OUTPUT_0_CTX_0 (0xE5CB)*/
+	0x40E5CA01, /* [0x004B] == TYPE4 == */
+	0x001F9F81, /* [0x004C] A5X_SP_PS_OUTPUT_CNTL_CTX_0 (0xE5CA)*/
+	0x40E14601, /* [0x004D] == TYPE4 == */
+	0x00000001, /* [0x004E] A5X_RB_PS_OUTPUT_CNTL_CTX_0 (0xE146)*/
+	0x40E38E01, /* [0x004F] == TYPE4 == */
+	0x00000000, /* [0x0050] A5X_PC_GS_PARAM_CTX_0 (0xE38E)*/
+	0x40E28A01, /* [0x0051] == TYPE4 == */
+	0x00000000, /* [0x0052] A5X_VPC_VARYING_REPLACE_MODE_0_CTX_0 (0xE28A)*/
+	0x48E1A901, /* [0x0053] == TYPE4 == */
+	0xFFFF0100, /* [0x0054] A5X_RB_BLEND_CNTL_CTX_0 (0xE1A9)*/
+	0x40E5C901, /* [0x0055] == TYPE4 == */
+	0x00000100, /* [0x0056] A5X_SP_BLEND_CNTL_CTX_0 (0xE5C9)*/
+	0x40E76401, /* [0x0057] == TYPE4 == */
+	0x00000000, /* [0x0058] A5X_TPL1_TP_PS_ROTATION_CNTL_CTX_0 (0xE764)*/
+	0x48E09401, /* [0x0059] == TYPE4 == */
+	0x00000000, /* [0x005A] A5X_GRAS_SU_DEPTH_PLANE_CNTL_CTX_0 (0xE094)*/
+	0x40E1B001, /* [0x005B] == TYPE4 == */
+	0x00000000, /* [0x005C] A5X_RB_DEPTH_PLANE_CNTL_CTX_0 (0xE1B0)*/
+	0x48E1B101, /* [0x005D] == TYPE4 == */
+	0x00000000, /* [0x005E] A5X_RB_DEPTH_CNTL_CTX_0 (0xE1B1)*/
+	0x48E40001, /* [0x005F] == TYPE4 == */
+	0x00000001, /* [0x0060] A5X_VFD_CNTL_0_CTX_0 (0xE400)*/
+	0x48E40A04, /* [0x0061] == TYPE4 == */
+	0x00000000, /* [0x0062] A5X_VFD_VERTEX_BUFFER_BASE_LO_0_CTX_0 (0xE40A)*/
+	0x00000000, /* [0x0063] A5X_VFD_VERTEX_BUFFER_BASE_HI_0_CTX_0 (0xE40B)*/
+	0x00000078, /* [0x0064] A5X_VFD_VERTEX_BUFFER_SIZE_0_CTX_0 (0xE40C)*/
+	0x00000008, /* [0x0065] A5X_VFD_VERTEX_BUFFER_STRIDE_0_CTX_0 (0xE40D)*/
+	0x40E48A02, /* [0x0066] == TYPE4 == */
+	0xC6700000, /* [0x0067] A5X_VFD_FETCH_INSTR_0_CTX_0 (0xE48A)*/
+	0x00000001, /* [0x0068] A5X_VFD_FETCH_INSTR_STEP_RATE_0_CTX_0 (0xE48B)*/
+	0x48E4CA01, /* [0x0069] == TYPE4 == */
+	0x0000000F, /* [0x006A] A5X_VFD_DEST_CNTL_0_CTX_0 (0xE4CA)*/
+	0x48E10001, /* [0x006B] == TYPE4 == */
+	0x00000008, /* [0x006C] A5X_GRAS_LRZ_CNTL_CTX_0 (0xE100)*/
+	0x48E0A101, /* [0x006D] == TYPE4 == */
+	0x00000004, /* [0x006E] A5X_GRAS_SC_BIN_CNTL_CTX_0 (0xE0A1)*/
+	0x40E10185, /* [0x006F] == TYPE4 == */
+	0x00000000, /* [0x0070] A5X_GRAS_LRZ_BUFFER_BASE_LO_CTX_0 (0xE101)*/
+	0x00000000, /* [0x0071] A5X_GRAS_LRZ_BUFFER_BASE_HI_CTX_0 (0xE102)*/
+	0x00000001, /* [0x0072] A5X_GRAS_LRZ_BUFFER_PITCH_CTX_0 (0xE103)*/
+	0x00000000, /* [0x0073] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_LO_CTX_0 (0xE104)*/
+	0x00000000, /* [0x0074] A5X_GRAS_LRZ_FAST_CLEAR_BUFFER_BASE_HI_CTX_0 (0xE105)*/
+	0x70388003, /* [0x0075] == TYPE7: DRAW_INDX_OFFSET (38) == */
+	0x00200884, /* [0x0076] */
+	0x00000001, /* [0x0077] */
+	0x00000003, /* [0x0078] */
+	0x70380007, /* [0x0079] == TYPE7: DRAW_INDX_OFFSET (38) == */
+	0x00200404, /* [0x007A] */
+	0x00000001, /* [0x007B] */
+	0x00000003, /* [0x007C] */
+	0x00000000, /* [0x007D] */
+	0x00000000, /* [0x007E] */
+	0x00000000, /* [0x007F] */
+	0x00000006, /* [0x0080] */
+	0x70460004, /* [0x0081] == TYPE7: EVENT_WRITE (46) == */
+	0x00000004, /* [0x0082] */
+	0x00000000, /* [0x0083] */
+	0x00000000, /* [0x0084] */
+	0x00000001, /* [0x0085] */
+	0x70268000, /* [0x0086] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x70A88003, /* [0x0087] == TYPE7: DRAW_INDIRECT (28) == */
+	0x00200884, /* [0x0088] */
+	0x00000000, /* [0x0089] */
+	0x00000000, /* [0x008A] */
+	0x70460004, /* [0x008B] == TYPE7: EVENT_WRITE (46) == */
+	0x00000004, /* [0x008C] */
+	0x00000000, /* [0x008D] */
+	0x00000000, /* [0x008E] */
+	0x00000001, /* [0x008F] */
+	0x70268000, /* [0x0090] == TYPE7: WAIT_FOR_IDLE (26) == */
+	0x70298006, /* [0x0091] == TYPE7: DRAW_INDX_INDIRECT (29) == */
+	0x00200404, /* [0x0092] */
+	0x00000000, /* [0x0093] */
+	0x00000000, /* [0x0094] */
+	0x00000006, /* [0x0095] */
+	0x00000000, /* [0x0096] */
+	0x00000000, /* [0x0097] */
+	0x40E40801, /* [0x0098] == TYPE4 == */
+	0x0000000D, /* [0x0099] A5X_VFD_INDEX_OFFSET_CTX_0 (0xE408)*/
+	0x48E40901, /* [0x009A] == TYPE4 == */
+	0x00000000, /* [0x009B] A5X_VFD_INSTANCE_START_OFFSET_CTX_0 (0xE409)*/
+	0x70388003, /* [0x009C] == TYPE7: DRAW_INDX_OFFSET (38) == */
+	0x00200884, /* [0x009D] */
+	0x00000001, /* [0x009E] */
+	0x00000003, /* [0x009F] */
+	0x00000000, /* [0x00A0] */
+	0x00000000, /* [0x00A1] */
+	0x00000000, /* [0x00A2] */
+	0x00000000, /* [0x00A3] */
+	0x00000000, /* [0x00A4] */
+	0x00000000, /* [0x00A5] */
+	0x00000000, /* [0x00A6] */
+	0x00000000, /* [0x00A7] */
+	0x48E78401, /* [0x00A8] */
+	0x00000881, /* [0x00A9] */
+	0x40E5C001, /* [0x00AA] */
+	0x0004001E, /* [0x00AB] */
+	0x70438003, /* [0x00AC] */
+	0x0000003A, /* [0x00AD] */
+	0x00000000, /* [0x00AE] */
+	0x00000000, /* [0x00AF] */
+	0x70B00023, /* [0x00B0] */
+	0x00600000, /* [0x00B1] */
+	0x00000000, /* [0x00B2] */
+	0x00000000, /* [0x00B3] */
+	0x00000000, /* [0x00B4] */
+	0x03000000, /* [0x00B5] */
+	0x00000000, /* [0x00B6] */
+	0x00000000, /* [0x00B7] */
+	0x00000000, /* [0x00B8] */
+	0x00000000, /* [0x00B9] */
+	0x00000000, /* [0x00BA] */
+	0x00000000, /* [0x00BB] */
+	0x00000000, /* [0x00BC] */
+	0x00000000, /* [0x00BD] */
+	0x00000000, /* [0x00BE] */
+	0x00000000, /* [0x00BF] */
+	0x00000000, /* [0x00C0] */
+	0x00000000, /* [0x00C1] */
+	0x00000000, /* [0x00C2] */
+	0x00000000, /* [0x00C3] */
+	0x00000000, /* [0x00C4] */
+	0x00000000, /* [0x00C5] */
+	0x00000000, /* [0x00C6] */
+	0x00000000, /* [0x00C7] */
+	0x00000000, /* [0x00C8] */
+	0x00000000, /* [0x00C9] */
+	0x00000000, /* [0x00CA] */
+	0x00000000, /* [0x00CB] */
+	0x00000000, /* [0x00CC] */
+	0x00000000, /* [0x00CD] */
+	0x00000000, /* [0x00CE] */
+	0x00000000, /* [0x00CF] */
+	0x00000000, /* [0x00D0] */
+	0x00000000, /* [0x00D1] */
+	0x00000000, /* [0x00D2] */
+	0x00000000, /* [0x00D3] */
+	0x40E09301, /* [0x00D4] */
+	0x00000000, /* [0x00D5] */
+	0x40E38D01, /* [0x00D6] */
+	0x00000000, /* [0x00D7] */
+	0x40E29801, /* [0x00D8] */
+	0x0000FFFF, /* [0x00D9] */
+	0x48E28201, /* [0x00DA] */
+	0xEAEAEAEA, /* [0x00DB] */
+	0x40E29404, /* [0x00DC] */
+	0xFFFFFFFF, /* [0x00DD] */
+	0xFFFFFFFF, /* [0x00DE] */
+	0xFFFFFFFF, /* [0x00DF] */
+	0xFFFFFFFF, /* [0x00E0] */
+	0x40E5DB01, /* [0x00E1] */
+	0x00000000, /* [0x00E2] */
+	0x48E14701, /* [0x00E3] */
+	0x0000000F, /* [0x00E4] */
+	0x70B00023, /* [0x00E5] */
+	0x00700000, /* [0x00E6] */
+	0x00000000, /* [0x00E7] */
+	0x00000000, /* [0x00E8] */
+	0x00003C00, /* [0x00E9] */
+	0x20400000, /* [0x00EA] */
+	0x00000000, /* [0x00EB] */
+	0x20400001, /* [0x00EC] */
+	0x00000000, /* [0x00ED] */
+	0x20400002, /* [0x00EE] */
+	0x00003C00, /* [0x00EF] */
+	0x20400003, /* [0x00F0] */
+	0x00000000, /* [0x00F1] */
+	0x03000000, /* [0x00F2] */
+	0x00000000, /* [0x00F3] */
+	0x00000000, /* [0x00F4] */
+	0x00000000, /* [0x00F5] */
+	0x00000000, /* [0x00F6] */
+	0x00000000, /* [0x00F7] */
+	0x00000000, /* [0x00F8] */
+	0x00000000, /* [0x00F9] */
+	0x00000000, /* [0x00FA] */
+	0x00000000, /* [0x00FB] */
+	0x00000000, /* [0x00FC] */
+	0x00000000, /* [0x00FD] */
+	0x00000000, /* [0x00FE] */
+	0x00000000, /* [0x00FF] */
+	0x00000000, /* [0x0100] */
+	0x00000000, /* [0x0101] */
+	0x00000000, /* [0x0102] */
+	0x00000000, /* [0x0103] */
+	0x00000000, /* [0x0104] */
+	0x00000000, /* [0x0105] */
+	0x00000000, /* [0x0106] */
+	0x00000000, /* [0x0107] */
+	0x00000000, /* [0x0108] */
+	0x48E2A001, /* [0x0109] */
+	0x000000FF, /* [0x010A] */
+	0x40E40185, /* [0x010B] */
+	0x00FCFCFC, /* [0x010C] */
+	0x0000FCFC, /* [0x010D] */
+	0x0000FCFC, /* [0x010E] */
+	0x000000FC, /* [0x010F] */
+	0x00000000, /* [0x0110] */
+	0x48E38F01, /* [0x0111] */
+	0x00000000, /* [0x0112] */
+	0x48E58001, /* [0x0113] */
+	0x00000010, /* [0x0114] */
+	0x40E1A801, /* [0x0115] */
+	0x00000E00, /* [0x0116] */
+	0x48E15001, /* [0x0117] */
+	0x000007E0, /* [0x0118] */
+	0x40E15101, /* [0x0119] */
+	0x00000000, /* [0x011A] */
+	0x40E00001, /* [0x011B] */
+	0x00000080, /* [0x011C] */
+	0x40E09583, /* [0x011D] */
+	0x00000000, /* [0x011E] */
+	0x00000000, /* [0x011F] */
+	0x00000000, /* [0x0120] */
+	0x40E09001, /* [0x0121] */
+	0x00000010, /* [0x0122] */
+	0x40E0AA02, /* [0x0123] */
+	0x00000000, /* [0x0124] */
+	0x001F0073, /* [0x0125] */
+	0x48E01086, /* [0x0126] */
+	0x42680000, /* [0x0127] */
+	0x42680000, /* [0x0128] */
+	0x41800000, /* [0x0129] */
+	0xC1800000, /* [0x012A] */
+	0x3EFFFEE0, /* [0x012B] */
+	0x3EFFFEE0, /* [0x012C] */
+	0x40E0CA02, /* [0x012D] */
+	0x00000000, /* [0x012E] */
+	0x001F0073, /* [0x012F] */
+	0x40E00601, /* [0x0130] */
+	0x0007FDFF, /* [0x0131] */
+	0x40E70401, /* [0x0132] */
+	0x00000000, /* [0x0133] */
+	0x48E70501, /* [0x0134] */
+	0x00000004, /* [0x0135] */
+	0x48E14201, /* [0x0136] */
+	0x00000000, /* [0x0137] */
+	0x40E14301, /* [0x0138] */
+	0x00000004, /* [0x0139] */
+	0x40E78683, /* [0x013A] */
+	0xFCFCFCFC, /* [0x013B] */
+	0xFCFCFCFC, /* [0x013C] */
+	0xFCFCFCFC, /* [0x013D] */
+	0x48E0A201, /* [0x013E] */
+	0x00000000, /* [0x013F] */
+	0x40E0A301, /* [0x0140] */
+	0x00000004, /* [0x0141] */
+	0x48E1B285, /* [0x0142] */
+	0x00000001, /* [0x0143] */
+	0x00004000, /* [0x0144] */
+	0x00000000, /* [0x0145] */
+	0x00000004, /* [0x0146] */
+	0x000000C0, /* [0x0147] */
+	0x48E09801, /* [0x0148] */
+	0x00000001, /* [0x0149] */
+	0x48E00401, /* [0x014A] */
+	0x00000000, /* [0x014B] */
+	0x480CDD02, /* [0x014C] */
+	0x00200074, /* [0x014D] */
+	0x00000000, /* [0x014E] */
+	0x40E15285, /* [0x014F] */
+	0x00000A30, /* [0x0150] */
+	0x00000008, /* [0x0151] */
+	0x00000100, /* [0x0152] */
+	0x00000000, /* [0x0153] */
+	0x00000000, /* [0x0154] */
+	0x48E14101, /* [0x0155] */
+	0x0000C008, /* [0x0156] */
+	0x40E0A001, /* [0x0157] */
+	0x00000008, /* [0x0158] */
+	0x40E28001, /* [0x0159] */
+	0x00010004, /* [0x015A] */
+	0x40E38401, /* [0x015B] */
+	0x00000404, /* [0x015C] */
+	0x40E78501, /* [0x015D] */
+	0x0000003F, /* [0x015E] */
+	0x48E5D301, /* [0x015F] */
+	0x00000030, /* [0x0160] */
+	0x48E5CB01, /* [0x0161] */
+	0x00000100, /* [0x0162] */
+	0x40E5CA01, /* [0x0163] */
+	0x001F9F81, /* [0x0164] */
+	0x40E14601, /* [0x0165] */
+	0x00000001, /* [0x0166] */
+	0x40E38E01, /* [0x0167] */
+	0x00000000, /* [0x0168] */
+	0x40E28A01, /* [0x0169] */
+	0x00000000, /* [0x016A] */
+	0x48E1A901, /* [0x016B] */
+	0xFFFF0100, /* [0x016C] */
+	0x40E5C901, /* [0x016D] */
+	0x00000100, /* [0x016E] */
+	0x40E76401, /* [0x016F] */
+	0x00000000, /* [0x0170] */
+	0x48E09401, /* [0x0171] */
+	0x00000000, /* [0x0172] */
+	0x40E1B001, /* [0x0173] */
+	0x00000000, /* [0x0174] */
+	0x48E1B101, /* [0x0175] */
+	0x00000006, /* [0x0176] */
+	0x48E40001, /* [0x0177] */
+	0x00000001, /* [0x0178] */
+	0x48E40A04, /* [0x0179] */
+	0x00000000, /* [0x017A] */
+	0x00000000, /* [0x017B] */
+	0x00000078, /* [0x017C] */
+	0x00000008, /* [0x017D] */
+	0x40E48A02, /* [0x017E] */
+	0xC6700000, /* [0x017F] */
+	0x00000001, /* [0x0180] */
+	0x48E4CA01, /* [0x0181] */
+	0x0000000F, /* [0x0182] */
+	0x48E10001, /* [0x0183] */
+	0x00000008, /* [0x0184] */
+	0x48E0A101, /* [0x0185] */
+	0x00000000, /* [0x0186] */
+	0x40E10185, /* [0x0187] */
+	0x00000000, /* [0x0188] */
+	0x00000000, /* [0x0189] */
+	0x00000001, /* [0x018A] */
+	0x00000000, /* [0x018B] */
+	0x00000000, /* [0x018C] */
+	0x70230001, /* [0x018D] */
+	0x00000000, /* [0x018E] */
+	0x70388003, /* [0x018F] */
+	0x00200984, /* [0x0190] */
+	0x00000001, /* [0x0191] */
+	0x00000003, /* [0x0192] */
+	0x70380007, /* [0x0193] */
+	0x00200504, /* [0x0194] */
+	0x00000001, /* [0x0195] */
+	0x00000003, /* [0x0196] */
+	0x00000000, /* [0x0197] */
+	0x00000000, /* [0x0198] */
+	0x00000000, /* [0x0199] */
+	0x00000006, /* [0x019A] */
+	0x70460004, /* [0x019B] */
+	0x00000004, /* [0x019C] */
+	0x00000000, /* [0x019D] */
+	0x00000000, /* [0x019E] */
+	0x00000000, /* [0x019F] */
+	0x70268000, /* [0x01A0] */
+	0x70A88003, /* [0x01A1] */
+	0x00200984, /* [0x01A2] */
+	0x00000000, /* [0x01A3] */
+	0x00000000, /* [0x01A4] */
+	0x70460004, /* [0x01A5] */
+	0x00000004, /* [0x01A6] */
+	0x00000000, /* [0x01A7] */
+	0x00000000, /* [0x01A8] */
+	0x00000001, /* [0x01A9] */
+	0x70268000, /* [0x01AA] */
+	0x70298006, /* [0x01AB] */
+	0x00200504, /* [0x01AC] */
+	0x00000000, /* [0x01AD] */
+	0x00000000, /* [0x01AE] */
+	0x00000006, /* [0x01AF] */
+	0x00000000, /* [0x01B0] */
+	0x00000000, /* [0x01B1] */
+	0x40E40801, /* [0x01B2] */
+	0x0000000D, /* [0x01B3] */
+	0x48E40901, /* [0x01B4] */
+	0x00000000, /* [0x01B5] */
+	0x70388003, /* [0x01B6] */
+	0x00200984, /* [0x01B7] */
+	0x00000001, /* [0x01B8] */
+	0x00000003, /* [0x01B9] */
+	0x00000000, /* [0x01BA] */
+	0x00000000, /* [0x01BB] */
+	0x00000000, /* [0x01BC] */
+	0x00000000, /* [0x01BD] */
+	0x00000000, /* [0x01BE] */
+	0x00000000, /* [0x01BF] */
+	0x70EA0001, /* [0x01C0] */
+	0x00000000, /* [0x01C1] */
+	0x40E78A01, /* [0x01C2] */
+	0x000FFFFF, /* [0x01C3] */
+	0x40E09001, /* [0x01C4] */
+	0x00000000, /* [0x01C5] */
+	0x40E00501, /* [0x01C6] */
+	0x00000000, /* [0x01C7] */
+	0x40E00001, /* [0x01C8] */
+	0x00000181, /* [0x01C9] */
+	0x48E10001, /* [0x01CA] */
+	0x00000000, /* [0x01CB] */
+	0x40E21385, /* [0x01CC] */
+	0x00000004, /* [0x01CD] */
+	0x00000000, /* [0x01CE] */
+	0x00000000, /* [0x01CF] */
+	0x00000001, /* [0x01D0] */
+	0x00000001, /* [0x01D1] */
+	0x40E21C01, /* [0x01D2] */
+	0x00000000, /* [0x01D3] */
+	0x40E21001, /* [0x01D4] */
+	0x00000000, /* [0x01D5] */
+	0x70460004, /* [0x01D6] */
+	0x0000001E, /* [0x01D7] */
+	0x00000000, /* [0x01D8] */
+	0x00000000, /* [0x01D9] */
+	0x00000001, /* [0x01DA] */
+	0x00000000, /* [0x01DB] */
+	0x00000000, /* [0x01DC] */
+	0x00000000, /* [0x01DD] */
+	0x00000000, /* [0x01DE] */
+	0x00000000, /* [0x01DF] */
+	0x40E78A01, /* [0x01E0] */
+	0x020FFFFF, /* [0x01E1] */
+	0x48E78B85, /* [0x01E2] */
+	0x00000001, /* [0x01E3] */
+	0x00003F05, /* [0x01E4] */
+	0x00003F04, /* [0x01E5] */
+	0x00003F04, /* [0x01E6] */
+	0x00003F04, /* [0x01E7] */
+	0x48E79001, /* [0x01E8] */
+	0x00000000, /* [0x01E9] */
+	0x40E79101, /* [0x01EA] */
+	0x00000002, /* [0x01EB] */
+	0x40E79201, /* [0x01EC] */
+	0x00000002, /* [0x01ED] */
+	0x40E58485, /* [0x01EE] */
+	0x00000001, /* [0x01EF] */
+	0x00003F05, /* [0x01F0] */
+	0x00003F04, /* [0x01F1] */
+	0x00003F04, /* [0x01F2] */
+	0x00003F04, /* [0x01F3] */
+	0x48E58901, /* [0x01F4] */
+	0x00000000, /* [0x01F5] */
+	0x48E7C302, /* [0x01F6] */
+	0x00000002, /* [0x01F7] */
+	0x00000001, /* [0x01F8] */
+	0x48E7D702, /* [0x01F9] */
+	0x00000002, /* [0x01FA] */
+	0x00000001, /* [0x01FB] */
+	0x40E7C802, /* [0x01FC] */
+	0x00000000, /* [0x01FD] */
+	0x00000000, /* [0x01FE] */
+	0x40E7CD02, /* [0x01FF] */
+	0x00000000, /* [0x0200] */
+	0x00000000, /* [0x0201] */
+	0x48E7D202, /* [0x0202] */
+	0x00000000, /* [0x0203] */
+	0x00000000, /* [0x0204] */
+	0x40E7DC02, /* [0x0205] */
+	0x00000000, /* [0x0206] */
+	0x00000000, /* [0x0207] */
+	0x48E38901, /* [0x0208] */
+	0x00000000, /* [0x0209] */
+	0x48E29A01, /* [0x020A] */
+	0x00FFFF00, /* [0x020B] */
+	0x48E00101, /* [0x020C] */
+	0x00000000, /* [0x020D] */
+	0x40E29D01, /* [0x020E] */
+	0x0000FF00, /* [0x020F] */
+	0x40E59001, /* [0x0210] */
+	0x00000406, /* [0x0211] */
+	0x48E59201, /* [0x0212] */
+	0x00000001, /* [0x0213] */
+	0x40E59301, /* [0x0214] */
+	0x00000F00, /* [0x0215] */
+	0x40E5A301, /* [0x0216] */
+	0x00000000, /* [0x0217] */
+	0x48E38501, /* [0x0218] */
+	0x00000000, /* [0x0219] */
+	0x00000000, /* [0x021A] */
+	0x00000000, /* [0x021B] */
+	0x00000000, /* [0x021C] */
+	0x00000000, /* [0x021D] */
+	0x00000000, /* [0x021E] */
+	0x00000000, /* [0x021F] */
+	0x48210001, /* [0x0220] */
+	0x86000000, /* [0x0221] */
+	0x40218001, /* [0x0222] */
+	0x86000000, /* [0x0223] */
+	0x40211089, /* [0x0224] */
+	0x00001331, /* [0x0225] */
+	0x00000000, /* [0x0226] */
+	0x00000000, /* [0x0227] */
+	0x00020001, /* [0x0228] */
+	0x00000000, /* [0x0229] */
+	0x00000000, /* [0x022A] */
+	0x00000000, /* [0x022B] */
+	0x00000000, /* [0x022C] */
+	0x00000000, /* [0x022D] */
+	0x48218201, /* [0x022E] */
+	0x00001331, /* [0x022F] */
+	0x40214383, /* [0x0230] */
+	0x00000000, /* [0x0231] */
+	0x00000000, /* [0x0232] */
+	0x00000001, /* [0x0233] */
+	0x40210789, /* [0x0234] */
+	0x00000021, /* [0x0235] */
+	0x00000000, /* [0x0236] */
+	0x00000000, /* [0x0237] */
+	0x00020001, /* [0x0238] */
+	0x00000000, /* [0x0239] */
+	0x00000000, /* [0x023A] */
+	0x00000000, /* [0x023B] */
+	0x00000000, /* [0x023C] */
+	0x00000000, /* [0x023D] */
+	0x48218101, /* [0x023E] */
+	0x00000021, /* [0x023F] */
+	0x48218401, /* [0x0240] */
+	0x00000001, /* [0x0241] */
+	0x702C8005, /* [0x0242] */
+	0x00000002, /* [0x0243] */
+	0x00000000, /* [0x0244] */
+	0x00010001, /* [0x0245] */
+	0x00000000, /* [0x0246] */
+	0x00010001, /* [0x0247] */
+	0x70B00023, /* [0x0248] */
+	0x00600000, /* [0x0249] */
+	0x00000000, /* [0x024A] */
+	0x00000000, /* [0x024B] */
+	0x00000000, /* [0x024C] */
+	0x03000000, /* [0x024D] */
+	0x00000000, /* [0x024E] */
+	0x00000000, /* [0x024F] */
+	0x00000000, /* [0x0250] */
+	0x00000000, /* [0x0251] */
+	0x00000000, /* [0x0252] */
+	0x00000000, /* [0x0253] */
+	0x00000000, /* [0x0254] */
+	0x00000000, /* [0x0255] */
+	0x00000000, /* [0x0256] */
+	0x00000000, /* [0x0257] */
+	0x00000000, /* [0x0258] */
+	0x00000000, /* [0x0259] */
+	0x00000000, /* [0x025A] */
+	0x00000000, /* [0x025B] */
+	0x00000000, /* [0x025C] */
+	0x00000000, /* [0x025D] */
+	0x00000000, /* [0x025E] */
+	0x00000000, /* [0x025F] */
+	0x00000000, /* [0x0260] */
+	0x00000000, /* [0x0261] */
+	0x00000000, /* [0x0262] */
+	0x00000000, /* [0x0263] */
+	0x00000000, /* [0x0264] */
+	0x00000000, /* [0x0265] */
+	0x00000000, /* [0x0266] */
+	0x00000000, /* [0x0267] */
+	0x00000000, /* [0x0268] */
+	0x00000000, /* [0x0269] */
+	0x00000000, /* [0x026A] */
+	0x00000000, /* [0x026B] */
+	0x40E09301, /* [0x026C] */
+	0x00000000, /* [0x026D] */
+	0x40E38D01, /* [0x026E] */
+	0x00000000, /* [0x026F] */
+	0x40E29801, /* [0x0270] */
+	0x0000FFFF, /* [0x0271] */
+	0x48E28201, /* [0x0272] */
+	0xEAEAEAEA, /* [0x0273] */
+	0x40E29404, /* [0x0274] */
+	0xFFFFFFFF, /* [0x0275] */
+	0xFFFFFFFF, /* [0x0276] */
+	0xFFFFFFFF, /* [0x0277] */
+	0xFFFFFFFF, /* [0x0278] */
+	0x40E5DB01, /* [0x0279] */
+	0x00000000, /* [0x027A] */
+	0x48E14701, /* [0x027B] */
+	0x0000000F, /* [0x027C] */
+	0x70B00023, /* [0x027D] */
+	0x00700000, /* [0x027E] */
+	0x00000000, /* [0x027F] */
+	0x00000000, /* [0x0280] */
+	0x00003C00, /* [0x0281] */
+	0x20400000, /* [0x0282] */
+	0x00000000, /* [0x0283] */
+	0x20400001, /* [0x0284] */
+	0x00000000, /* [0x0285] */
+	0x20400002, /* [0x0286] */
+	0x00003C00, /* [0x0287] */
+	0x20400003, /* [0x0288] */
+	0x00000000, /* [0x0289] */
+	0x03000000, /* [0x028A] */
+	0x00000000, /* [0x028B] */
+	0x00000000, /* [0x028C] */
+	0x00000000, /* [0x028D] */
+	0x00000000, /* [0x028E] */
+	0x00000000, /* [0x028F] */
+	0x00000000, /* [0x0290] */
+	0x00000000, /* [0x0291] */
+	0x00000000, /* [0x0292] */
+	0x00000000, /* [0x0293] */
+	0x00000000, /* [0x0294] */
+	0x00000000, /* [0x0295] */
+	0x00000000, /* [0x0296] */
+	0x00000000, /* [0x0297] */
+	0x00000000, /* [0x0298] */
+	0x00000000, /* [0x0299] */
+	0x00000000, /* [0x029A] */
+	0x00000000, /* [0x029B] */
+	0x00000000, /* [0x029C] */
+	0x00000000, /* [0x029D] */
+	0x00000000, /* [0x029E] */
+	0x00000000, /* [0x029F] */
+	0x00000000, /* [0x02A0] */
+};
+
+/* Fixups for the IBs in _a5xx_critical_pkts_mem03 */
+static const struct adreno_critical_fixup critical_pkt_mem03_fixups[] = {
+	{ 2, 3, CRITICAL_PACKET3, 0x0780 },
+	{ 6, 7, CRITICAL_PACKET2, 0x0000 },
+	{ 98, 99, CRITICAL_PACKET1, 0x0000 },
+	{ 112, 113, CRITICAL_PACKET1, 0x0480 },
+	{ 115, 116, CRITICAL_PACKET1, 0x0400 },
+	{ 126, 127, CRITICAL_PACKET1, 0x0080 },
+	{ 131, 132, CRITICAL_PACKET2, 0x0108 },
+	{ 137, 138, CRITICAL_PACKET1, 0x00A0 },
+	{ 141, 142, CRITICAL_PACKET2, 0x0108 },
+	{ 147, 148, CRITICAL_PACKET1, 0x0080 },
+	{ 150, 151, CRITICAL_PACKET1, 0x00C0 },
+	{ 174, 175, CRITICAL_PACKET3, 0x0780 },
+	{ 378, 379, CRITICAL_PACKET1, 0x0000 },
+	{ 392, 393, CRITICAL_PACKET1, 0x0480 },
+	{ 395, 396, CRITICAL_PACKET1, 0x0400 },
+	{ 408, 409, CRITICAL_PACKET1, 0x0080 },
+	{ 413, 414, CRITICAL_PACKET2, 0x0108 },
+	{ 419, 420, CRITICAL_PACKET1, 0x00A0 },
+	{ 423, 424, CRITICAL_PACKET2, 0x0108 },
+	{ 429, 430, CRITICAL_PACKET1, 0x0080 },
+	{ 432, 433, CRITICAL_PACKET1, 0x00C0 },
+	{ 462, 463, CRITICAL_PACKET0, 0x0700 },
+	{ 472, 473, CRITICAL_PACKET2, 0x0110 },
+	{ 550, 551, CRITICAL_PACKET1, 0x0500 },
+	{ 561, 562, CRITICAL_PACKET1, 0x0600 },
+	{ 566, 567, CRITICAL_PACKET1, 0x0700 },
+};
diff --git a/drivers/gpu/msm/adreno_a5xx_preempt.c b/drivers/gpu/msm/adreno_a5xx_preempt.c
new file mode 100644
index 0000000..43302a0
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a5xx_preempt.c
@@ -0,0 +1,595 @@
+/* Copyright (c) 2014-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "adreno.h"
+#include "adreno_a5xx.h"
+#include "a5xx_reg.h"
+#include "adreno_trace.h"
+#include "adreno_pm4types.h"
+
+#define PREEMPT_RECORD(_field) \
+		offsetof(struct a5xx_cp_preemption_record, _field)
+
+#define PREEMPT_SMMU_RECORD(_field) \
+		offsetof(struct a5xx_cp_smmu_info, _field)
+
+static void _update_wptr(struct adreno_device *adreno_dev, bool reset_timer)
+{
+	struct adreno_ringbuffer *rb = adreno_dev->cur_rb;
+	unsigned int wptr;
+	unsigned long flags;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr);
+
+	if (wptr != rb->wptr) {
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR,
+			rb->wptr);
+		/*
+		 * In case something got submitted while preemption was on
+		 * going, reset the timer.
+		 */
+		reset_timer = 1;
+	}
+
+	if (reset_timer)
+		rb->dispatch_q.expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+}
+
+static inline bool adreno_move_preempt_state(struct adreno_device *adreno_dev,
+	enum adreno_preempt_states old, enum adreno_preempt_states new)
+{
+	return (atomic_cmpxchg(&adreno_dev->preempt.state, old, new) == old);
+}
+
+static void _a5xx_preemption_done(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * In the very unlikely case that the power is off, do nothing - the
+	 * state will be reset on power up and everybody will be happy
+	 */
+
+	if (!kgsl_state_is_awake(device))
+		return;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &status);
+
+	if (status != 0) {
+		KGSL_DRV_ERR(device,
+			"Preemption not complete: status=%X cur=%d R/W=%X/%X next=%d R/W=%X/%X\n",
+			status, adreno_dev->cur_rb->id,
+			adreno_get_rptr(adreno_dev->cur_rb),
+			adreno_dev->cur_rb->wptr, adreno_dev->next_rb->id,
+			adreno_get_rptr(adreno_dev->next_rb),
+			adreno_dev->next_rb->wptr);
+
+		/* Set a fault and restart */
+		adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+		adreno_dispatcher_schedule(device);
+
+		return;
+	}
+
+	del_timer_sync(&adreno_dev->preempt.timer);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb, adreno_dev->next_rb);
+
+	/* Clean up all the bits */
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr for the new command queue */
+	_update_wptr(adreno_dev, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	/* Clear the preempt state */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+}
+
+static void _a5xx_preemption_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int status;
+
+	/*
+	 * If the power is on check the preemption status one more time - if it
+	 * was successful then just transition to the complete state
+	 */
+	if (kgsl_state_is_awake(device)) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &status);
+
+		if (status == 0) {
+			adreno_set_preempt_state(adreno_dev,
+				ADRENO_PREEMPT_COMPLETE);
+
+			adreno_dispatcher_schedule(device);
+			return;
+		}
+	}
+
+	KGSL_DRV_ERR(device,
+		"Preemption timed out: cur=%d R/W=%X/%X, next=%d R/W=%X/%X\n",
+		adreno_dev->cur_rb->id,
+		adreno_get_rptr(adreno_dev->cur_rb), adreno_dev->cur_rb->wptr,
+		adreno_dev->next_rb->id,
+		adreno_get_rptr(adreno_dev->next_rb),
+		adreno_dev->next_rb->wptr);
+
+	adreno_set_gpu_fault(adreno_dev, ADRENO_PREEMPT_FAULT);
+	adreno_dispatcher_schedule(device);
+}
+
+static void _a5xx_preemption_worker(struct work_struct *work)
+{
+	struct adreno_preemption *preempt = container_of(work,
+		struct adreno_preemption, work);
+	struct adreno_device *adreno_dev = container_of(preempt,
+		struct adreno_device, preempt);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	/* Need to take the mutex to make sure that the power stays on */
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_FAULTED))
+		_a5xx_preemption_fault(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+static void _a5xx_preemption_timer(unsigned long data)
+{
+	struct adreno_device *adreno_dev = (struct adreno_device *) data;
+
+	/* We should only be here from a triggered state */
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_FAULTED))
+		return;
+
+	/* Schedule the worker to take care of the details */
+	queue_work(system_unbound_wq, &adreno_dev->preempt.work);
+}
+
+/* Find the highest priority active ringbuffer */
+static struct adreno_ringbuffer *a5xx_next_ringbuffer(
+		struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	unsigned long flags;
+	unsigned int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		bool empty;
+
+		spin_lock_irqsave(&rb->preempt_lock, flags);
+		empty = adreno_rb_empty(rb);
+		spin_unlock_irqrestore(&rb->preempt_lock, flags);
+
+		if (empty == false)
+			return rb;
+	}
+
+	return NULL;
+}
+
+void a5xx_preemption_trigger(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+	struct adreno_ringbuffer *next;
+	uint64_t ttbr0;
+	unsigned int contextidr;
+	unsigned long flags;
+
+	/* Put ourselves into a possible trigger state */
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_NONE, ADRENO_PREEMPT_START))
+		return;
+
+	/* Get the next ringbuffer to preempt in */
+	next = a5xx_next_ringbuffer(adreno_dev);
+
+	/*
+	 * Nothing to do if every ringbuffer is empty or if the current
+	 * ringbuffer is the only active one
+	 */
+	if (next == NULL || next == adreno_dev->cur_rb) {
+		/*
+		 * Update any critical things that might have been skipped while
+		 * we were looking for a new ringbuffer
+		 */
+
+		if (next != NULL) {
+			_update_wptr(adreno_dev, false);
+
+			mod_timer(&adreno_dev->dispatcher.timer,
+				adreno_dev->cur_rb->dispatch_q.expires);
+		}
+
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+		return;
+	}
+
+	/* Turn off the dispatcher timer */
+	del_timer(&adreno_dev->dispatcher.timer);
+
+	/*
+	 * This is the most critical section - we need to take care not to race
+	 * until we have programmed the CP for the switch
+	 */
+
+	spin_lock_irqsave(&next->preempt_lock, flags);
+
+	/*
+	 * Get the pagetable from the pagetable info.
+	 * The pagetable_desc is allocated and mapped at probe time, and
+	 * preemption_desc at init time, so no need to check if
+	 * sharedmem accesses to these memdescs succeed.
+	 */
+	kgsl_sharedmem_readq(&next->pagetable_desc, &ttbr0,
+		PT_INFO_OFFSET(ttbr0));
+	kgsl_sharedmem_readl(&next->pagetable_desc, &contextidr,
+		PT_INFO_OFFSET(contextidr));
+
+	kgsl_sharedmem_writel(device, &next->preemption_desc,
+		PREEMPT_RECORD(wptr), next->wptr);
+
+	spin_unlock_irqrestore(&next->preempt_lock, flags);
+
+	/* And write it to the smmu info */
+	kgsl_sharedmem_writeq(device, &iommu->smmu_info,
+		PREEMPT_SMMU_RECORD(ttbr0), ttbr0);
+	kgsl_sharedmem_writel(device, &iommu->smmu_info,
+		PREEMPT_SMMU_RECORD(context_idr), contextidr);
+
+	kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_LO,
+		lower_32_bits(next->preemption_desc.gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_CONTEXT_SWITCH_RESTORE_ADDR_HI,
+		upper_32_bits(next->preemption_desc.gpuaddr));
+
+	adreno_dev->next_rb = next;
+
+	/* Start the timer to detect a stuck preemption */
+	mod_timer(&adreno_dev->preempt.timer,
+		jiffies + msecs_to_jiffies(ADRENO_PREEMPT_TIMEOUT));
+
+	trace_adreno_preempt_trigger(adreno_dev->cur_rb, adreno_dev->next_rb);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_TRIGGERED);
+
+	/* Trigger the preemption */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PREEMPT, 1);
+}
+
+void a5xx_preempt_callback(struct adreno_device *adreno_dev, int bit)
+{
+	unsigned int status;
+
+	if (!adreno_move_preempt_state(adreno_dev,
+		ADRENO_PREEMPT_TRIGGERED, ADRENO_PREEMPT_PENDING))
+		return;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_PREEMPT, &status);
+
+	if (status != 0) {
+		KGSL_DRV_ERR(KGSL_DEVICE(adreno_dev),
+			"preempt interrupt with non-zero status: %X\n", status);
+
+		/*
+		 * Under the assumption that this is a race between the
+		 * interrupt and the register, schedule the worker to clean up.
+		 * If the status still hasn't resolved itself by the time we get
+		 * there then we have to assume something bad happened
+		 */
+		adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE);
+		adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+		return;
+	}
+
+	del_timer(&adreno_dev->preempt.timer);
+
+	trace_adreno_preempt_done(adreno_dev->cur_rb,
+		adreno_dev->next_rb);
+
+	adreno_dev->prev_rb = adreno_dev->cur_rb;
+	adreno_dev->cur_rb = adreno_dev->next_rb;
+	adreno_dev->next_rb = NULL;
+
+	/* Update the wptr if it changed while preemption was ongoing */
+	_update_wptr(adreno_dev, true);
+
+	/* Update the dispatcher timer for the new command queue */
+	mod_timer(&adreno_dev->dispatcher.timer,
+		adreno_dev->cur_rb->dispatch_q.expires);
+
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	a5xx_preemption_trigger(adreno_dev);
+}
+
+void a5xx_preemption_schedule(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_COMPLETE))
+		_a5xx_preemption_done(adreno_dev);
+
+	a5xx_preemption_trigger(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+}
+
+unsigned int a5xx_preemption_pre_ibsubmit(
+			struct adreno_device *adreno_dev,
+			struct adreno_ringbuffer *rb,
+			unsigned int *cmds, struct kgsl_context *context)
+{
+	unsigned int *cmds_orig = cmds;
+	uint64_t gpuaddr = rb->preemption_desc.gpuaddr;
+	unsigned int preempt_style = 0;
+
+	if (context) {
+		/*
+		 * Preemption from secure to unsecure needs Zap shader to be
+		 * run to clear all secure content. CP does not know during
+		 * preemption if it is switching between secure and unsecure
+		 * contexts so restrict Secure contexts to be preempted at
+		 * ringbuffer level.
+		 */
+		if (context->flags & KGSL_CONTEXT_SECURE)
+			preempt_style = KGSL_CONTEXT_PREEMPT_STYLE_RINGBUFFER;
+		else
+			preempt_style = ADRENO_PREEMPT_STYLE(context->flags);
+	}
+
+	/*
+	 * CP_PREEMPT_ENABLE_GLOBAL(global preemption) can only be set by KMD
+	 * in ringbuffer.
+	 * 1) set global preemption to 0x0 to disable global preemption.
+	 *    Only RB level preemption is allowed in this mode
+	 * 2) Set global preemption to defer(0x2) for finegrain preemption.
+	 *    when global preemption is set to defer(0x2),
+	 *    CP_PREEMPT_ENABLE_LOCAL(local preemption) determines the
+	 *    preemption point. Local preemption
+	 *    can be enabled by both UMD(within IB) and KMD.
+	 */
+	*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_GLOBAL, 1);
+	*cmds++ = ((preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN)
+				? 2 : 0);
+
+	/* Turn CP protection OFF */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 0;
+
+	/*
+	 * CP during context switch will save context switch info to
+	 * a5xx_cp_preemption_record pointed by CONTEXT_SWITCH_SAVE_ADDR
+	 */
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_LO, 1);
+	*cmds++ = lower_32_bits(gpuaddr);
+	*cmds++ = cp_type4_packet(A5XX_CP_CONTEXT_SWITCH_SAVE_ADDR_HI, 1);
+	*cmds++ = upper_32_bits(gpuaddr);
+
+	/* Turn CP protection ON */
+	*cmds++ = cp_type7_packet(CP_SET_PROTECTED_MODE, 1);
+	*cmds++ = 1;
+
+	/*
+	 * Enable local preemption for finegrain preemption in case of
+	 * a misbehaving IB
+	 */
+	if (preempt_style == KGSL_CONTEXT_PREEMPT_STYLE_FINEGRAIN) {
+		*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1);
+		*cmds++ = 1;
+	} else {
+		*cmds++ = cp_type7_packet(CP_PREEMPT_ENABLE_LOCAL, 1);
+		*cmds++ = 0;
+	}
+
+	/* Enable CP_CONTEXT_SWITCH_YIELD packets in the IB2s */
+	*cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1);
+	*cmds++ = 2;
+
+	return (unsigned int) (cmds - cmds_orig);
+}
+
+int a5xx_preemption_yield_enable(unsigned int *cmds)
+{
+	/*
+	 * SRM -- set render mode (ex binning, direct render etc)
+	 * SRM is set by UMD usually at start of IB to tell CP the type of
+	 * preemption.
+	 * KMD needs to set SRM to NULL to indicate CP that rendering is
+	 * done by IB.
+	 */
+	*cmds++ = cp_type7_packet(CP_SET_RENDER_MODE, 5);
+	*cmds++ = 0;
+	*cmds++ = 0;
+	*cmds++ = 0;
+	*cmds++ = 0;
+	*cmds++ = 0;
+
+	*cmds++ = cp_type7_packet(CP_YIELD_ENABLE, 1);
+	*cmds++ = 1;
+
+	return 8;
+}
+
+unsigned int a5xx_preemption_post_ibsubmit(struct adreno_device *adreno_dev,
+	unsigned int *cmds)
+{
+	int dwords = 0;
+
+	cmds[dwords++] = cp_type7_packet(CP_CONTEXT_SWITCH_YIELD, 4);
+	/* Write NULL to the address to skip the data write */
+	dwords += cp_gpuaddr(adreno_dev, &cmds[dwords], 0x0);
+	cmds[dwords++] = 1;
+	/* generate interrupt on preemption completion */
+	cmds[dwords++] = 1;
+
+	return dwords;
+}
+
+void a5xx_preemption_start(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+	struct adreno_ringbuffer *rb;
+	unsigned int i;
+
+	if (!adreno_is_preemption_enabled(adreno_dev))
+		return;
+
+	/* Force the state to be clear */
+	adreno_set_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE);
+
+	/* smmu_info is allocated and mapped in a5xx_preemption_iommu_init */
+	kgsl_sharedmem_writel(device, &iommu->smmu_info,
+		PREEMPT_SMMU_RECORD(magic), A5XX_CP_SMMU_INFO_MAGIC_REF);
+	kgsl_sharedmem_writeq(device, &iommu->smmu_info,
+		PREEMPT_SMMU_RECORD(ttbr0), MMU_DEFAULT_TTBR0(device));
+
+	/* The CP doesn't use the asid record, so poison it */
+	kgsl_sharedmem_writel(device, &iommu->smmu_info,
+		PREEMPT_SMMU_RECORD(asid), 0xDECAFBAD);
+	kgsl_sharedmem_writel(device, &iommu->smmu_info,
+		PREEMPT_SMMU_RECORD(context_idr),
+		MMU_DEFAULT_CONTEXTIDR(device));
+
+	adreno_writereg64(adreno_dev,
+			ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_LO,
+			ADRENO_REG_CP_CONTEXT_SWITCH_SMMU_INFO_HI,
+			iommu->smmu_info.gpuaddr);
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		/*
+		 * preemption_desc is allocated and mapped at init time,
+		 * so no need to check sharedmem_writel return value
+		 */
+		kgsl_sharedmem_writel(device, &rb->preemption_desc,
+			PREEMPT_RECORD(rptr), 0);
+		kgsl_sharedmem_writel(device, &rb->preemption_desc,
+			PREEMPT_RECORD(wptr), 0);
+
+		adreno_ringbuffer_set_pagetable(rb,
+			device->mmu.defaultpagetable);
+	}
+
+}
+
+static int a5xx_preemption_ringbuffer_init(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb, uint64_t counteraddr)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	ret = kgsl_allocate_global(device, &rb->preemption_desc,
+		A5XX_CP_CTXRECORD_SIZE_IN_BYTES, 0, KGSL_MEMDESC_PRIVILEGED,
+		"preemption_desc");
+	if (ret)
+		return ret;
+
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(magic), A5XX_CP_CTXRECORD_MAGIC_REF);
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(info), 0);
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(data), 0);
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(cntl), A5XX_CP_RB_CNTL_DEFAULT);
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(rptr), 0);
+	kgsl_sharedmem_writel(device, &rb->preemption_desc,
+		PREEMPT_RECORD(wptr), 0);
+	kgsl_sharedmem_writeq(device, &rb->preemption_desc,
+		PREEMPT_RECORD(rptr_addr), SCRATCH_RPTR_GPU_ADDR(device,
+			rb->id));
+	kgsl_sharedmem_writeq(device, &rb->preemption_desc,
+		PREEMPT_RECORD(rbase), rb->buffer_desc.gpuaddr);
+	kgsl_sharedmem_writeq(device, &rb->preemption_desc,
+		PREEMPT_RECORD(counter), counteraddr);
+
+	return 0;
+}
+
+#ifdef CONFIG_QCOM_KGSL_IOMMU
+static int a5xx_preemption_iommu_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+
+	/* Allocate mem for storing preemption smmu record */
+	return kgsl_allocate_global(device, &iommu->smmu_info, PAGE_SIZE,
+		KGSL_MEMFLAGS_GPUREADONLY, KGSL_MEMDESC_PRIVILEGED,
+		"smmu_info");
+}
+#else
+static int a5xx_preemption_iommu_init(struct adreno_device *adreno_dev)
+{
+	return -ENODEV;
+}
+#endif
+
+int a5xx_preemption_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_preemption *preempt = &adreno_dev->preempt;
+	struct adreno_ringbuffer *rb;
+	int ret;
+	unsigned int i;
+	uint64_t addr;
+
+	/* We are dependent on IOMMU to make preemption go on the CP side */
+	if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU)
+		return -ENODEV;
+
+	INIT_WORK(&preempt->work, _a5xx_preemption_worker);
+
+	setup_timer(&preempt->timer, _a5xx_preemption_timer,
+		(unsigned long) adreno_dev);
+
+	/* Allocate mem for storing preemption counters */
+	ret = kgsl_allocate_global(device, &preempt->counters,
+		adreno_dev->num_ringbuffers *
+		A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE, 0, 0,
+		"preemption_counters");
+	if (ret)
+		return ret;
+
+	addr = preempt->counters.gpuaddr;
+
+	/* Allocate mem for storing preemption switch record */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		ret = a5xx_preemption_ringbuffer_init(adreno_dev, rb, addr);
+		if (ret)
+			return ret;
+
+		addr += A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE;
+	}
+
+	return a5xx_preemption_iommu_init(adreno_dev);
+}
diff --git a/drivers/gpu/msm/adreno_a5xx_snapshot.c b/drivers/gpu/msm/adreno_a5xx_snapshot.c
new file mode 100644
index 0000000..9fd7cb4
--- /dev/null
+++ b/drivers/gpu/msm/adreno_a5xx_snapshot.c
@@ -0,0 +1,1103 @@
+/* Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/io.h>
+#include "kgsl.h"
+#include "adreno.h"
+#include "kgsl_snapshot.h"
+#include "adreno_snapshot.h"
+#include "a5xx_reg.h"
+#include "adreno_a5xx.h"
+
+enum a5xx_rbbm_debbus_id {
+	A5XX_RBBM_DBGBUS_CP          = 0x1,
+	A5XX_RBBM_DBGBUS_RBBM        = 0x2,
+	A5XX_RBBM_DBGBUS_VBIF        = 0x3,
+	A5XX_RBBM_DBGBUS_HLSQ        = 0x4,
+	A5XX_RBBM_DBGBUS_UCHE        = 0x5,
+	A5XX_RBBM_DBGBUS_DPM         = 0x6,
+	A5XX_RBBM_DBGBUS_TESS        = 0x7,
+	A5XX_RBBM_DBGBUS_PC          = 0x8,
+	A5XX_RBBM_DBGBUS_VFDP        = 0x9,
+	A5XX_RBBM_DBGBUS_VPC         = 0xa,
+	A5XX_RBBM_DBGBUS_TSE         = 0xb,
+	A5XX_RBBM_DBGBUS_RAS         = 0xc,
+	A5XX_RBBM_DBGBUS_VSC         = 0xd,
+	A5XX_RBBM_DBGBUS_COM         = 0xe,
+	A5XX_RBBM_DBGBUS_DCOM        = 0xf,
+	A5XX_RBBM_DBGBUS_LRZ         = 0x10,
+	A5XX_RBBM_DBGBUS_A2D_DSP     = 0x11,
+	A5XX_RBBM_DBGBUS_CCUFCHE     = 0x12,
+	A5XX_RBBM_DBGBUS_GPMU        = 0x13,
+	A5XX_RBBM_DBGBUS_RBP         = 0x14,
+	A5XX_RBBM_DBGBUS_HM          = 0x15,
+	A5XX_RBBM_DBGBUS_RBBM_CFG    = 0x16,
+	A5XX_RBBM_DBGBUS_VBIF_CX     = 0x17,
+	A5XX_RBBM_DBGBUS_GPC         = 0x1d,
+	A5XX_RBBM_DBGBUS_LARC        = 0x1e,
+	A5XX_RBBM_DBGBUS_HLSQ_SPTP   = 0x1f,
+	A5XX_RBBM_DBGBUS_RB_0        = 0x20,
+	A5XX_RBBM_DBGBUS_RB_1        = 0x21,
+	A5XX_RBBM_DBGBUS_RB_2        = 0x22,
+	A5XX_RBBM_DBGBUS_RB_3        = 0x23,
+	A5XX_RBBM_DBGBUS_CCU_0       = 0x28,
+	A5XX_RBBM_DBGBUS_CCU_1       = 0x29,
+	A5XX_RBBM_DBGBUS_CCU_2       = 0x2a,
+	A5XX_RBBM_DBGBUS_CCU_3       = 0x2b,
+	A5XX_RBBM_DBGBUS_A2D_RAS_0   = 0x30,
+	A5XX_RBBM_DBGBUS_A2D_RAS_1   = 0x31,
+	A5XX_RBBM_DBGBUS_A2D_RAS_2   = 0x32,
+	A5XX_RBBM_DBGBUS_A2D_RAS_3   = 0x33,
+	A5XX_RBBM_DBGBUS_VFD_0       = 0x38,
+	A5XX_RBBM_DBGBUS_VFD_1       = 0x39,
+	A5XX_RBBM_DBGBUS_VFD_2       = 0x3a,
+	A5XX_RBBM_DBGBUS_VFD_3       = 0x3b,
+	A5XX_RBBM_DBGBUS_SP_0        = 0x40,
+	A5XX_RBBM_DBGBUS_SP_1        = 0x41,
+	A5XX_RBBM_DBGBUS_SP_2        = 0x42,
+	A5XX_RBBM_DBGBUS_SP_3        = 0x43,
+	A5XX_RBBM_DBGBUS_TPL1_0      = 0x48,
+	A5XX_RBBM_DBGBUS_TPL1_1      = 0x49,
+	A5XX_RBBM_DBGBUS_TPL1_2      = 0x4a,
+	A5XX_RBBM_DBGBUS_TPL1_3      = 0x4b
+};
+
+static const struct adreno_debugbus_block a5xx_debugbus_blocks[] = {
+	{  A5XX_RBBM_DBGBUS_CP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RBBM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VBIF, 0x100, },
+	{  A5XX_RBBM_DBGBUS_HLSQ, 0x100, },
+	{  A5XX_RBBM_DBGBUS_UCHE, 0x100, },
+	{  A5XX_RBBM_DBGBUS_DPM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TESS, 0x100, },
+	{  A5XX_RBBM_DBGBUS_PC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFDP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VPC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TSE, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RAS, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VSC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_COM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_DCOM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_LRZ, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_DSP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCUFCHE, 0x100, },
+	{  A5XX_RBBM_DBGBUS_GPMU, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RBP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_HM, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RBBM_CFG, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VBIF_CX, 0x100, },
+	{  A5XX_RBBM_DBGBUS_GPC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_LARC, 0x100, },
+	{  A5XX_RBBM_DBGBUS_HLSQ_SPTP, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_RB_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_CCU_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_A2D_RAS_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_VFD_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_SP_3, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_0, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_1, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_2, 0x100, },
+	{  A5XX_RBBM_DBGBUS_TPL1_3, 0x100, },
+};
+
+#define A5XX_NUM_AXI_ARB_BLOCKS	2
+#define A5XX_NUM_XIN_BLOCKS	4
+
+/* Width of A5XX_CP_DRAW_STATE_ADDR is 8 bits */
+#define A5XX_CP_DRAW_STATE_ADDR_WIDTH 8
+
+/* a5xx_snapshot_cp_pm4() - Dump PM4 data in snapshot */
+static size_t a5xx_snapshot_cp_pm4(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size = adreno_dev->pm4_fw_size;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PM4_RAM;
+	header->size = size;
+
+	memcpy(data, adreno_dev->pm4.hostptr, size * sizeof(uint32_t));
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/* a5xx_snapshot_cp_pfp() - Dump the PFP data on snapshot */
+static size_t a5xx_snapshot_cp_pfp(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int size = adreno_dev->pfp_fw_size;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PFP_RAM;
+	header->size = size;
+
+	memcpy(data, adreno_dev->pfp.hostptr, size * sizeof(uint32_t));
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/* a5xx_rbbm_debug_bus_read() - Read data from trace bus */
+static void a5xx_rbbm_debug_bus_read(struct kgsl_device *device,
+	unsigned int block_id, unsigned int index, unsigned int *val)
+{
+	unsigned int reg;
+
+	reg = (block_id << A5XX_RBBM_CFG_DBGBUS_SEL_PING_BLK_SEL_SHIFT) |
+			(index << A5XX_RBBM_CFG_DBGBUS_SEL_PING_INDEX_SHIFT);
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_A, reg);
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_B, reg);
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_C, reg);
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_SEL_D, reg);
+
+	kgsl_regread(device, A5XX_RBBM_CFG_DBGBUS_TRACE_BUF2, val);
+	val++;
+	kgsl_regread(device, A5XX_RBBM_CFG_DBGBUS_TRACE_BUF1, val);
+
+}
+
+/* a5xx_snapshot_vbif_debugbus() - Dump the VBIF debug data */
+static size_t a5xx_snapshot_vbif_debugbus(struct kgsl_device *device,
+			u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i, j;
+	/*
+	 * Total number of VBIF data words considering 3 sections:
+	 * 2 arbiter blocks of 16 words
+	 * 4 AXI XIN blocks of 18 dwords each
+	 * 4 core clock side XIN blocks of 12 dwords each
+	 */
+	unsigned int dwords = (16 * A5XX_NUM_AXI_ARB_BLOCKS) +
+			(18 * A5XX_NUM_XIN_BLOCKS) + (12 * A5XX_NUM_XIN_BLOCKS);
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	size_t size;
+	unsigned int reg_clk;
+
+	size = (dwords * sizeof(unsigned int)) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+	header->id = block->block_id;
+	header->count = dwords;
+
+	kgsl_regread(device, A5XX_VBIF_CLKON, &reg_clk);
+	kgsl_regwrite(device, A5XX_VBIF_CLKON, reg_clk |
+			(A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_MASK <<
+			A5XX_VBIF_CLKON_FORCE_ON_TESTBUS_SHIFT));
+	kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL0, 0);
+	kgsl_regwrite(device, A5XX_VBIF_TEST_BUS_OUT_CTRL,
+			(A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_MASK <<
+			A5XX_VBIF_TEST_BUS_OUT_CTRL_EN_SHIFT));
+	for (i = 0; i < A5XX_NUM_AXI_ARB_BLOCKS; i++) {
+		kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL0,
+			(1 << (i + 16)));
+		for (j = 0; j < 16; j++) {
+			kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL1,
+				((j & A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK)
+				<< A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT,
+					data);
+			data++;
+		}
+	}
+
+	/* XIN blocks AXI side */
+	for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) {
+		kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL0, 1 << i);
+		for (j = 0; j < 18; j++) {
+			kgsl_regwrite(device, A5XX_VBIF_TEST_BUS2_CTRL1,
+				((j & A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_MASK)
+				<< A5XX_VBIF_TEST_BUS2_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT,
+				data);
+			data++;
+		}
+	}
+
+	/* XIN blocks core clock side */
+	for (i = 0; i < A5XX_NUM_XIN_BLOCKS; i++) {
+		kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL0, 1 << i);
+		for (j = 0; j < 12; j++) {
+			kgsl_regwrite(device, A5XX_VBIF_TEST_BUS1_CTRL1,
+				((j & A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_MASK)
+				<< A5XX_VBIF_TEST_BUS1_CTRL1_DATA_SEL_SHIFT));
+			kgsl_regread(device, A5XX_VBIF_TEST_BUS_OUT,
+				data);
+			data++;
+		}
+	}
+	/* restore the clock of VBIF */
+	kgsl_regwrite(device, A5XX_VBIF_CLKON, reg_clk);
+	return size;
+}
+
+/* a5xx_snapshot_debugbus_block() - Capture debug data for a gpu block */
+static size_t a5xx_snapshot_debugbus_block(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_debugbus *header =
+		(struct kgsl_snapshot_debugbus *)buf;
+	struct adreno_debugbus_block *block = priv;
+	int i;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int dwords;
+	size_t size;
+
+	dwords = block->dwords;
+
+	/* For a5xx each debug bus data unit is 2 DWRODS */
+	size = (dwords * sizeof(unsigned int) * 2) + sizeof(*header);
+
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "DEBUGBUS");
+		return 0;
+	}
+
+	header->id = block->block_id;
+	header->count = dwords * 2;
+
+	for (i = 0; i < dwords; i++)
+		a5xx_rbbm_debug_bus_read(device, block->block_id, i,
+					&data[i*2]);
+
+	return size;
+}
+
+/* a5xx_snapshot_debugbus() - Capture debug bus data */
+static void a5xx_snapshot_debugbus(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	int i;
+
+	kgsl_regwrite(device, A5XX_RBBM_CFG_DBGBUS_CNTLM,
+		0xf << A5XX_RBBM_CFG_DEBBUS_CTLTM_ENABLE_SHIFT);
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_debugbus_blocks); i++) {
+		if (a5xx_debugbus_blocks[i].block_id == A5XX_RBBM_DBGBUS_VBIF)
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a5xx_snapshot_vbif_debugbus,
+				(void *) &a5xx_debugbus_blocks[i]);
+		else
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_DEBUGBUS,
+				snapshot, a5xx_snapshot_debugbus_block,
+				(void *) &a5xx_debugbus_blocks[i]);
+	}
+}
+
+static const unsigned int a5xx_vbif_ver_20xxxxxx_registers[] = {
+	0x3000, 0x3007, 0x300C, 0x3014, 0x3018, 0x302C, 0x3030, 0x3030,
+	0x3034, 0x3036, 0x3038, 0x3038, 0x303C, 0x303D, 0x3040, 0x3040,
+	0x3042, 0x3042, 0x3049, 0x3049, 0x3058, 0x3058, 0x305A, 0x3061,
+	0x3064, 0x3068, 0x306C, 0x306D, 0x3080, 0x3088, 0x308C, 0x308C,
+	0x3090, 0x3094, 0x3098, 0x3098, 0x309C, 0x309C, 0x30C0, 0x30C0,
+	0x30C8, 0x30C8, 0x30D0, 0x30D0, 0x30D8, 0x30D8, 0x30E0, 0x30E0,
+	0x3100, 0x3100, 0x3108, 0x3108, 0x3110, 0x3110, 0x3118, 0x3118,
+	0x3120, 0x3120, 0x3124, 0x3125, 0x3129, 0x3129, 0x3131, 0x3131,
+	0x340C, 0x340C, 0x3410, 0x3410, 0x3800, 0x3801,
+};
+
+static const struct adreno_vbif_snapshot_registers
+a5xx_vbif_snapshot_registers[] = {
+	{ 0x20000000, 0xFF000000, a5xx_vbif_ver_20xxxxxx_registers,
+				ARRAY_SIZE(a5xx_vbif_ver_20xxxxxx_registers)/2},
+};
+
+/*
+ * Set of registers to dump for A5XX on snapshot.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+
+static const unsigned int a5xx_registers[] = {
+	/* RBBM */
+	0x0000, 0x0002, 0x0004, 0x0020, 0x0022, 0x0026, 0x0029, 0x002B,
+	0x002E, 0x0035, 0x0038, 0x0042, 0x0044, 0x0044, 0x0047, 0x0095,
+	0x0097, 0x00BB, 0x03A0, 0x0464, 0x0469, 0x046F, 0x04D2, 0x04D3,
+	0x04E0, 0x04F4, 0X04F6, 0x0533, 0x0540, 0x0555, 0xF400, 0xF400,
+	0xF800, 0xF807,
+	/* CP */
+	0x0800, 0x081A, 0x081F, 0x0841, 0x0860, 0x0860, 0x0880, 0x08A0,
+	0x0B00, 0x0B12, 0x0B15, 0X0B1C, 0X0B1E, 0x0B28, 0x0B78, 0x0B7F,
+	0x0BB0, 0x0BBD,
+	/* VSC */
+	0x0BC0, 0x0BC6, 0x0BD0, 0x0C53, 0x0C60, 0x0C61,
+	/* GRAS */
+	0x0C80, 0x0C82, 0x0C84, 0x0C85, 0x0C90, 0x0C98, 0x0CA0, 0x0CA0,
+	0x0CB0, 0x0CB2, 0x2180, 0x2185, 0x2580, 0x2585,
+	/* RB */
+	0x0CC1, 0x0CC1, 0x0CC4, 0x0CC7, 0x0CCC, 0x0CCC, 0x0CD0, 0x0CD8,
+	0x0CE0, 0x0CE5, 0x0CE8, 0x0CE8, 0x0CEC, 0x0CF1, 0x0CFB, 0x0D0E,
+	0x2100, 0x211E, 0x2140, 0x2145, 0x2500, 0x251E, 0x2540, 0x2545,
+	/* PC */
+	0x0D10, 0x0D17, 0x0D20, 0x0D23, 0x0D30, 0x0D30, 0x20C0, 0x20C0,
+	0x24C0, 0x24C0,
+	/* VFD */
+	0x0E40, 0x0E43, 0x0E4A, 0x0E4A, 0x0E50, 0x0E57,
+	/* VPC */
+	0x0E60, 0x0E7C,
+	/* UCHE */
+	0x0E80, 0x0E8F, 0x0E90, 0x0E96, 0xEA0, 0xEA8, 0xEB0, 0xEB2,
+
+	/* RB CTX 0 */
+	0xE140, 0xE147, 0xE150, 0xE187, 0xE1A0, 0xE1A9, 0xE1B0, 0xE1B6,
+	0xE1C0, 0xE1C7, 0xE1D0, 0xE1D1, 0xE200, 0xE201, 0xE210, 0xE21C,
+	0xE240, 0xE268,
+	/* GRAS CTX 0 */
+	0xE000, 0xE006, 0xE010, 0xE09A, 0xE0A0, 0xE0A4, 0xE0AA, 0xE0EB,
+	0xE100, 0xE105,
+	/* PC CTX 0 */
+	0xE380, 0xE38F, 0xE3B0, 0xE3B0,
+	/* VFD CTX 0 */
+	0xE400, 0xE405, 0xE408, 0xE4E9, 0xE4F0, 0xE4F0,
+	/* VPC CTX 0 */
+	0xE280, 0xE280, 0xE282, 0xE2A3, 0xE2A5, 0xE2C2,
+
+	/* RB CTX 1 */
+	0xE940, 0xE947, 0xE950, 0xE987, 0xE9A0, 0xE9A9, 0xE9B0, 0xE9B6,
+	0xE9C0, 0xE9C7, 0xE9D0, 0xE9D1, 0xEA00, 0xEA01, 0xEA10, 0xEA1C,
+	0xEA40, 0xEA68,
+	/* GRAS CTX 1 */
+	0xE800, 0xE806, 0xE810, 0xE89A, 0xE8A0, 0xE8A4, 0xE8AA, 0xE8EB,
+	0xE900, 0xE905,
+	/* PC CTX 1 */
+	0xEB80, 0xEB8F, 0xEBB0, 0xEBB0,
+	/* VFD CTX 1 */
+	0xEC00, 0xEC05, 0xEC08, 0xECE9, 0xECF0, 0xECF0,
+	/* VPC CTX 1 */
+	0xEA80, 0xEA80, 0xEA82, 0xEAA3, 0xEAA5, 0xEAC2,
+	/* GPMU */
+	0xA800, 0xA8FF, 0xAC60, 0xAC60,
+};
+
+/*
+ * Set of registers to dump for A5XX before actually triggering crash dumper.
+ * Registers in pairs - first value is the start offset, second
+ * is the stop offset (inclusive)
+ */
+static const unsigned int a5xx_pre_crashdumper_registers[] = {
+	/* RBBM: RBBM_STATUS */
+	0x04F5, 0x04F5,
+	/* CP: CP_STATUS_1 */
+	0x0B1D, 0x0B1D,
+};
+
+
+struct a5xx_hlsq_sp_tp_regs {
+	unsigned int statetype;
+	unsigned int ahbaddr;
+	unsigned int size;
+	uint64_t offset;
+};
+
+static struct a5xx_hlsq_sp_tp_regs a5xx_hlsq_sp_tp_registers[] = {
+	/* HSLQ non context. 0xe32 - 0xe3f are holes so don't include them */
+	{ 0x35, 0xE00, 0x32 },
+	/* HLSQ CTX 0 2D */
+	{ 0x31, 0x2080, 0x1 },
+	/* HLSQ CTX 1 2D */
+	{ 0x33, 0x2480, 0x1 },
+	/* HLSQ CTX 0 3D. 0xe7e2 - 0xe7ff are holes so don't inculde them */
+	{ 0x32, 0xE780, 0x62 },
+	/* HLSQ CTX 1 3D. 0xefe2 - 0xefff are holes so don't include them */
+	{ 0x34, 0xEF80, 0x62 },
+
+	/* SP non context */
+	{ 0x3f, 0x0EC0, 0x40 },
+	/* SP CTX 0 2D */
+	{ 0x3d, 0x2040, 0x1 },
+	/* SP CTX 1 2D */
+	{ 0x3b, 0x2440, 0x1 },
+	/* SP CTX 0 3D */
+	{ 0x3e, 0xE580, 0x180 },
+	/* SP CTX 1 3D */
+	{ 0x3c, 0xED80, 0x180 },
+
+	/* TP non context. 0x0f1c - 0x0f3f are holes so don't include them */
+	{ 0x3a, 0x0F00, 0x1c },
+	/* TP CTX 0 2D. 0x200a - 0x200f are holes so don't include them */
+	{ 0x38, 0x2000, 0xa },
+	/* TP CTX 1 2D.   0x240a - 0x240f are holes so don't include them */
+	{ 0x36, 0x2400, 0xa },
+	/* TP CTX 0 3D */
+	{ 0x39, 0xE700, 0x80 },
+	/* TP CTX 1 3D */
+	{ 0x37, 0xEF00, 0x80 },
+};
+
+
+#define A5XX_NUM_SHADER_BANKS 4
+#define A5XX_SHADER_STATETYPE_SHIFT 8
+
+enum a5xx_shader_obj {
+	A5XX_TP_W_MEMOBJ = 1,
+	A5XX_TP_W_SAMPLER = 2,
+	A5XX_TP_W_MIPMAP_BASE = 3,
+	A5XX_TP_W_MEMOBJ_TAG = 4,
+	A5XX_TP_W_SAMPLER_TAG = 5,
+	A5XX_TP_S_3D_MEMOBJ = 6,
+	A5XX_TP_S_3D_SAMPLER = 0x7,
+	A5XX_TP_S_3D_MEMOBJ_TAG = 0x8,
+	A5XX_TP_S_3D_SAMPLER_TAG = 0x9,
+	A5XX_TP_S_CS_MEMOBJ = 0xA,
+	A5XX_TP_S_CS_SAMPLER = 0xB,
+	A5XX_TP_S_CS_MEMOBJ_TAG = 0xC,
+	A5XX_TP_S_CS_SAMPLER_TAG = 0xD,
+	A5XX_SP_W_INSTR = 0xE,
+	A5XX_SP_W_CONST = 0xF,
+	A5XX_SP_W_UAV_SIZE = 0x10,
+	A5XX_SP_W_CB_SIZE = 0x11,
+	A5XX_SP_W_UAV_BASE = 0x12,
+	A5XX_SP_W_CB_BASE = 0x13,
+	A5XX_SP_W_INST_TAG = 0x14,
+	A5XX_SP_W_STATE = 0x15,
+	A5XX_SP_S_3D_INSTR = 0x16,
+	A5XX_SP_S_3D_CONST = 0x17,
+	A5XX_SP_S_3D_CB_BASE = 0x18,
+	A5XX_SP_S_3D_CB_SIZE = 0x19,
+	A5XX_SP_S_3D_UAV_BASE = 0x1A,
+	A5XX_SP_S_3D_UAV_SIZE = 0x1B,
+	A5XX_SP_S_CS_INSTR = 0x1C,
+	A5XX_SP_S_CS_CONST = 0x1D,
+	A5XX_SP_S_CS_CB_BASE = 0x1E,
+	A5XX_SP_S_CS_CB_SIZE = 0x1F,
+	A5XX_SP_S_CS_UAV_BASE = 0x20,
+	A5XX_SP_S_CS_UAV_SIZE = 0x21,
+	A5XX_SP_S_3D_INSTR_DIRTY = 0x22,
+	A5XX_SP_S_3D_CONST_DIRTY = 0x23,
+	A5XX_SP_S_3D_CB_BASE_DIRTY = 0x24,
+	A5XX_SP_S_3D_CB_SIZE_DIRTY = 0x25,
+	A5XX_SP_S_3D_UAV_BASE_DIRTY = 0x26,
+	A5XX_SP_S_3D_UAV_SIZE_DIRTY = 0x27,
+	A5XX_SP_S_CS_INSTR_DIRTY = 0x28,
+	A5XX_SP_S_CS_CONST_DIRTY = 0x29,
+	A5XX_SP_S_CS_CB_BASE_DIRTY = 0x2A,
+	A5XX_SP_S_CS_CB_SIZE_DIRTY = 0x2B,
+	A5XX_SP_S_CS_UAV_BASE_DIRTY = 0x2C,
+	A5XX_SP_S_CS_UAV_SIZE_DIRTY = 0x2D,
+	A5XX_HLSQ_ICB = 0x2E,
+	A5XX_HLSQ_ICB_DIRTY = 0x2F,
+	A5XX_HLSQ_ICB_CB_BASE_DIRTY = 0x30,
+	A5XX_SP_POWER_RESTORE_RAM = 0x40,
+	A5XX_SP_POWER_RESTORE_RAM_TAG = 0x41,
+	A5XX_TP_POWER_RESTORE_RAM = 0x42,
+	A5XX_TP_POWER_RESTORE_RAM_TAG = 0x43,
+
+};
+
+struct a5xx_shader_block {
+	unsigned int statetype;
+	unsigned int sz;
+	uint64_t offset;
+};
+
+struct a5xx_shader_block_info {
+	struct a5xx_shader_block *block;
+	unsigned int bank;
+	uint64_t offset;
+};
+
+static struct a5xx_shader_block a5xx_shader_blocks[] = {
+	{A5XX_TP_W_MEMOBJ,              0x200},
+	{A5XX_TP_W_MIPMAP_BASE,         0x3C0},
+	{A5XX_TP_W_SAMPLER_TAG,          0x40},
+	{A5XX_TP_S_3D_SAMPLER,           0x80},
+	{A5XX_TP_S_3D_SAMPLER_TAG,       0x20},
+	{A5XX_TP_S_CS_SAMPLER,           0x40},
+	{A5XX_TP_S_CS_SAMPLER_TAG,       0x10},
+	{A5XX_SP_W_CONST,               0x800},
+	{A5XX_SP_W_CB_SIZE,              0x30},
+	{A5XX_SP_W_CB_BASE,              0xF0},
+	{A5XX_SP_W_STATE,                 0x1},
+	{A5XX_SP_S_3D_CONST,            0x800},
+	{A5XX_SP_S_3D_CB_SIZE,           0x28},
+	{A5XX_SP_S_3D_UAV_SIZE,          0x80},
+	{A5XX_SP_S_CS_CONST,            0x400},
+	{A5XX_SP_S_CS_CB_SIZE,            0x8},
+	{A5XX_SP_S_CS_UAV_SIZE,          0x80},
+	{A5XX_SP_S_3D_CONST_DIRTY,       0x12},
+	{A5XX_SP_S_3D_CB_SIZE_DIRTY,      0x1},
+	{A5XX_SP_S_3D_UAV_SIZE_DIRTY,     0x2},
+	{A5XX_SP_S_CS_CONST_DIRTY,        0xA},
+	{A5XX_SP_S_CS_CB_SIZE_DIRTY,      0x1},
+	{A5XX_SP_S_CS_UAV_SIZE_DIRTY,     0x2},
+	{A5XX_HLSQ_ICB_DIRTY,             0xB},
+	{A5XX_SP_POWER_RESTORE_RAM_TAG,   0xA},
+	{A5XX_TP_POWER_RESTORE_RAM_TAG,   0xA},
+	{A5XX_TP_W_SAMPLER,              0x80},
+	{A5XX_TP_W_MEMOBJ_TAG,           0x40},
+	{A5XX_TP_S_3D_MEMOBJ,           0x200},
+	{A5XX_TP_S_3D_MEMOBJ_TAG,        0x20},
+	{A5XX_TP_S_CS_MEMOBJ,           0x100},
+	{A5XX_TP_S_CS_MEMOBJ_TAG,        0x10},
+	{A5XX_SP_W_INSTR,               0x800},
+	{A5XX_SP_W_UAV_SIZE,             0x80},
+	{A5XX_SP_W_UAV_BASE,             0x80},
+	{A5XX_SP_W_INST_TAG,             0x40},
+	{A5XX_SP_S_3D_INSTR,            0x800},
+	{A5XX_SP_S_3D_CB_BASE,           0xC8},
+	{A5XX_SP_S_3D_UAV_BASE,          0x80},
+	{A5XX_SP_S_CS_INSTR,            0x400},
+	{A5XX_SP_S_CS_CB_BASE,           0x28},
+	{A5XX_SP_S_CS_UAV_BASE,          0x80},
+	{A5XX_SP_S_3D_INSTR_DIRTY,        0x1},
+	{A5XX_SP_S_3D_CB_BASE_DIRTY,      0x5},
+	{A5XX_SP_S_3D_UAV_BASE_DIRTY,     0x2},
+	{A5XX_SP_S_CS_INSTR_DIRTY,        0x1},
+	{A5XX_SP_S_CS_CB_BASE_DIRTY,      0x1},
+	{A5XX_SP_S_CS_UAV_BASE_DIRTY,     0x2},
+	{A5XX_HLSQ_ICB,                 0x200},
+	{A5XX_HLSQ_ICB_CB_BASE_DIRTY,     0x4},
+	{A5XX_SP_POWER_RESTORE_RAM,     0x140},
+	{A5XX_TP_POWER_RESTORE_RAM,      0x40},
+};
+
+static struct kgsl_memdesc capturescript;
+static struct kgsl_memdesc registers;
+static bool crash_dump_valid;
+
+static size_t a5xx_snapshot_shader_memory(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_shader *header =
+		(struct kgsl_snapshot_shader *) buf;
+	struct a5xx_shader_block_info *info =
+		(struct a5xx_shader_block_info *) priv;
+	struct a5xx_shader_block *block = info->block;
+	unsigned int *data = (unsigned int *) (buf + sizeof(*header));
+
+	if (remain < SHADER_SECTION_SZ(block->sz)) {
+		SNAPSHOT_ERR_NOMEM(device, "SHADER MEMORY");
+		return 0;
+	}
+
+	header->type = block->statetype;
+	header->index = info->bank;
+	header->size = block->sz;
+
+	memcpy(data, registers.hostptr + info->offset, block->sz);
+
+	return SHADER_SECTION_SZ(block->sz);
+}
+
+static void a5xx_snapshot_shader(struct kgsl_device *device,
+			   struct kgsl_snapshot *snapshot)
+{
+	unsigned int i, j;
+	struct a5xx_shader_block_info info;
+
+	/* Shader blocks can only be read by the crash dumper */
+	if (crash_dump_valid == false)
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) {
+		for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) {
+			info.block = &a5xx_shader_blocks[i];
+			info.bank = j;
+			info.offset = a5xx_shader_blocks[i].offset +
+				(j * a5xx_shader_blocks[i].sz);
+
+			/* Shader working/shadow memory */
+			kgsl_snapshot_add_section(device,
+				KGSL_SNAPSHOT_SECTION_SHADER,
+				snapshot, a5xx_snapshot_shader_memory, &info);
+		}
+	}
+}
+
+/* Dump registers which get affected by crash dumper trigger */
+static size_t a5xx_snapshot_pre_crashdump_regs(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_registers pre_cdregs = {
+			.regs = a5xx_pre_crashdumper_registers,
+			.count = ARRAY_SIZE(a5xx_pre_crashdumper_registers)/2,
+	};
+
+	return kgsl_snapshot_dump_registers(device, buf, remain, &pre_cdregs);
+}
+
+static size_t a5xx_legacy_snapshot_registers(struct kgsl_device *device,
+		u8 *buf, size_t remain)
+{
+	struct kgsl_snapshot_registers regs = {
+		.regs = a5xx_registers,
+		.count = ARRAY_SIZE(a5xx_registers) / 2,
+	};
+
+	return kgsl_snapshot_dump_registers(device, buf, remain, &regs);
+}
+
+static struct cdregs {
+	const unsigned int *regs;
+	unsigned int size;
+} _a5xx_cd_registers[] = {
+	{ a5xx_registers, ARRAY_SIZE(a5xx_registers) },
+};
+
+#define REG_PAIR_COUNT(_a, _i) \
+	(((_a)[(2 * (_i)) + 1] - (_a)[2 * (_i)]) + 1)
+
+static size_t a5xx_snapshot_registers(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	unsigned int *src = (unsigned int *) registers.hostptr;
+	unsigned int i, j, k;
+	unsigned int count = 0;
+
+	if (crash_dump_valid == false)
+		return a5xx_legacy_snapshot_registers(device, buf, remain);
+
+	if (remain < sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	remain -= sizeof(*header);
+
+	for (i = 0; i < ARRAY_SIZE(_a5xx_cd_registers); i++) {
+		struct cdregs *regs = &_a5xx_cd_registers[i];
+
+		for (j = 0; j < regs->size / 2; j++) {
+			unsigned int start = regs->regs[2 * j];
+			unsigned int end = regs->regs[(2 * j) + 1];
+
+			if (remain < ((end - start) + 1) * 8) {
+				SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+				goto out;
+			}
+
+			remain -= ((end - start) + 1) * 8;
+
+			for (k = start; k <= end; k++, count++) {
+				*data++ = k;
+				*data++ = *src++;
+			}
+		}
+	}
+
+out:
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+
+/* Snapshot a preemption record buffer */
+static size_t snapshot_preemption_record(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_memdesc *memdesc = priv;
+
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)buf;
+
+	u8 *ptr = buf + sizeof(*header);
+
+	if (remain < (SZ_64K + sizeof(*header))) {
+		SNAPSHOT_ERR_NOMEM(device, "PREEMPTION RECORD");
+		return 0;
+	}
+
+	header->size = SZ_64K >> 2;
+	header->gpuaddr = memdesc->gpuaddr;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(device->mmu.defaultpagetable);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	memcpy(ptr, memdesc->hostptr, SZ_64K);
+
+	return SZ_64K + sizeof(*header);
+}
+
+
+static void _a5xx_do_crashdump(struct kgsl_device *device)
+{
+	unsigned long wait_time;
+	unsigned int reg = 0;
+	unsigned int val;
+
+	crash_dump_valid = false;
+
+	if (capturescript.gpuaddr == 0 || registers.gpuaddr == 0)
+		return;
+
+	/* IF the SMMU is stalled we cannot do a crash dump */
+	kgsl_regread(device, A5XX_RBBM_STATUS3, &val);
+	if (val & BIT(24))
+		return;
+
+	/* Turn on APRIV so we can access the buffers */
+	kgsl_regwrite(device, A5XX_CP_CNTL, 1);
+
+	kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_LO,
+			lower_32_bits(capturescript.gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_CRASH_SCRIPT_BASE_HI,
+			upper_32_bits(capturescript.gpuaddr));
+	kgsl_regwrite(device, A5XX_CP_CRASH_DUMP_CNTL, 1);
+
+	wait_time = jiffies + msecs_to_jiffies(CP_CRASH_DUMPER_TIMEOUT);
+	while (!time_after(jiffies, wait_time)) {
+		kgsl_regread(device, A5XX_CP_CRASH_DUMP_CNTL, &reg);
+		if (reg & 0x4)
+			break;
+		cpu_relax();
+	}
+
+	kgsl_regwrite(device, A5XX_CP_CNTL, 0);
+
+	if (!(reg & 0x4)) {
+		KGSL_CORE_ERR("Crash dump timed out: 0x%X\n", reg);
+		return;
+	}
+
+	crash_dump_valid = true;
+}
+
+static int get_hlsq_registers(struct kgsl_device *device,
+		const struct a5xx_hlsq_sp_tp_regs *regs, unsigned int *data)
+{
+	unsigned int i;
+	unsigned int *src = registers.hostptr + regs->offset;
+
+	for (i = 0; i < regs->size; i++) {
+		*data++ = regs->ahbaddr + i;
+		*data++ = *(src + i);
+	}
+
+	return (2 * regs->size);
+}
+
+static size_t a5xx_snapshot_dump_hlsq_sp_tp_regs(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int count = 0, i;
+
+	/* Figure out how many registers we are going to dump */
+	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++)
+		count += a5xx_hlsq_sp_tp_registers[i].size;
+
+	if (remain < (count * 8) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++)
+		data += get_hlsq_registers(device,
+				&a5xx_hlsq_sp_tp_registers[i], data);
+
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+
+/*
+ * a5xx_snapshot() - A5XX GPU snapshot function
+ * @adreno_dev: Device being snapshotted
+ * @snapshot: Pointer to the snapshot instance
+ *
+ * This is where all of the A5XX specific bits and pieces are grabbed
+ * into the snapshot memory
+ */
+void a5xx_snapshot(struct adreno_device *adreno_dev,
+		struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_snapshot_data *snap_data = gpudev->snapshot_data;
+	unsigned int reg, i;
+	struct adreno_ringbuffer *rb;
+
+	/* Disable Clock gating temporarily for the debug bus to work */
+	a5xx_hwcg_set(adreno_dev, false);
+
+	/* Dump the registers which get affected by crash dumper trigger */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
+		snapshot, a5xx_snapshot_pre_crashdump_regs, NULL);
+
+	/* Dump vbif registers as well which get affected by crash dumper */
+	adreno_snapshot_vbif_registers(device, snapshot,
+		a5xx_vbif_snapshot_registers,
+		ARRAY_SIZE(a5xx_vbif_snapshot_registers));
+
+	/* Try to run the crash dumper */
+	if (device->snapshot_crashdumper)
+		_a5xx_do_crashdump(device);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
+		snapshot, a5xx_snapshot_registers, NULL);
+
+	/* Dump SP TP HLSQ registers */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot,
+		a5xx_snapshot_dump_hlsq_sp_tp_regs, NULL);
+
+	/* CP_PFP indexed registers */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_PFP_STAT_ADDR, A5XX_CP_PFP_STAT_DATA,
+		0, snap_data->sect_sizes->cp_pfp);
+
+	/* CP_ME indexed registers */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_ME_STAT_ADDR, A5XX_CP_ME_STAT_DATA,
+		0, snap_data->sect_sizes->cp_me);
+
+	/* CP_DRAW_STATE */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_DRAW_STATE_ADDR, A5XX_CP_DRAW_STATE_DATA,
+		0, 1 << A5XX_CP_DRAW_STATE_ADDR_WIDTH);
+
+	/*
+	 * CP needs to be halted on a530v1 before reading CP_PFP_UCODE_DBG_DATA
+	 * and CP_PM4_UCODE_DBG_DATA registers
+	 */
+	if (adreno_is_a530v1(adreno_dev)) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, &reg);
+		reg |= (1 << 27) | (1 << 28);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg);
+	}
+
+	/* ME_UCODE Cache */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_ME_UCODE_DBG_ADDR, A5XX_CP_ME_UCODE_DBG_DATA,
+		0, 0x53F);
+
+	/* PFP_UCODE Cache */
+	kgsl_snapshot_indexed_registers(device, snapshot,
+		A5XX_CP_PFP_UCODE_DBG_ADDR, A5XX_CP_PFP_UCODE_DBG_DATA,
+		0, 0x53F);
+
+	/* CP MEQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_meq,
+		&snap_data->sect_sizes->cp_meq);
+
+	/* CP ROQ */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_roq,
+		&snap_data->sect_sizes->roq);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, adreno_snapshot_cp_merciu,
+		&snap_data->sect_sizes->cp_merciu);
+
+	/* CP PFP and PM4 */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a5xx_snapshot_cp_pfp, NULL);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_DEBUG,
+		snapshot, a5xx_snapshot_cp_pm4, NULL);
+
+	/* Shader memory */
+	a5xx_snapshot_shader(device, snapshot);
+
+	/* Debug bus */
+	a5xx_snapshot_debugbus(device, snapshot);
+
+	/* Preemption record */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+			snapshot, snapshot_preemption_record,
+			&rb->preemption_desc);
+	}
+
+}
+
+static int _a5xx_crashdump_init_shader(struct a5xx_shader_block *block,
+		uint64_t *ptr, uint64_t *offset)
+{
+	int qwords = 0;
+	unsigned int j;
+
+	/* Capture each bank in the block */
+	for (j = 0; j < A5XX_NUM_SHADER_BANKS; j++) {
+		/* Program the aperture */
+		ptr[qwords++] =
+			(block->statetype << A5XX_SHADER_STATETYPE_SHIFT) | j;
+		ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_READ_SEL << 44)) |
+			(1 << 21) | 1;
+
+		/* Read all the data in one chunk */
+		ptr[qwords++] = registers.gpuaddr + *offset;
+		ptr[qwords++] =
+			(((uint64_t) A5XX_HLSQ_DBG_AHB_READ_APERTURE << 44)) |
+			block->sz;
+
+		/* Remember the offset of the first bank for easy access */
+		if (j == 0)
+			block->offset = *offset;
+
+		*offset += block->sz * sizeof(unsigned int);
+	}
+
+	return qwords;
+}
+
+static int _a5xx_crashdump_init_hlsq(struct a5xx_hlsq_sp_tp_regs *regs,
+		uint64_t *ptr, uint64_t *offset)
+{
+	int qwords = 0;
+
+	/* Program the aperture */
+	ptr[qwords++] =
+		(regs->statetype << A5XX_SHADER_STATETYPE_SHIFT);
+	ptr[qwords++] = (((uint64_t) A5XX_HLSQ_DBG_READ_SEL << 44)) |
+		(1 << 21) | 1;
+
+	/* Read all the data in one chunk */
+	ptr[qwords++] = registers.gpuaddr + *offset;
+	ptr[qwords++] =
+		(((uint64_t) A5XX_HLSQ_DBG_AHB_READ_APERTURE << 44)) |
+		regs->size;
+
+	/* Remember the offset of the first bank for easy access */
+	regs->offset = *offset;
+
+	*offset += regs->size * sizeof(unsigned int);
+
+	return qwords;
+}
+
+void a5xx_crashdump_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int script_size = 0;
+	unsigned int data_size = 0;
+	unsigned int i, j;
+	uint64_t *ptr;
+	uint64_t offset = 0;
+
+	if (capturescript.gpuaddr != 0 && registers.gpuaddr != 0)
+		return;
+
+	/*
+	 * We need to allocate two buffers:
+	 * 1 - the buffer to hold the draw script
+	 * 2 - the buffer to hold the data
+	 */
+
+	/*
+	 * To save the registers, we need 16 bytes per register pair for the
+	 * script and a dword for each register int the data
+	 */
+	for (i = 0; i < ARRAY_SIZE(_a5xx_cd_registers); i++) {
+		struct cdregs *regs = &_a5xx_cd_registers[i];
+
+		/* Each pair needs 16 bytes (2 qwords) */
+		script_size += (regs->size / 2) * 16;
+
+		/* Each register needs a dword in the data */
+		for (j = 0; j < regs->size / 2; j++)
+			data_size += REG_PAIR_COUNT(regs->regs, j) *
+				sizeof(unsigned int);
+
+	}
+
+	/*
+	 * To save the shader blocks for each block in each type we need 32
+	 * bytes for the script (16 bytes to program the aperture and 16 to
+	 * read the data) and then a block specific number of bytes to hold
+	 * the data
+	 */
+	for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) {
+		script_size += 32 * A5XX_NUM_SHADER_BANKS;
+		data_size += a5xx_shader_blocks[i].sz * sizeof(unsigned int) *
+			A5XX_NUM_SHADER_BANKS;
+	}
+	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++) {
+		script_size += 32;
+		data_size +=
+		a5xx_hlsq_sp_tp_registers[i].size * sizeof(unsigned int);
+	}
+
+	/* Now allocate the script and data buffers */
+
+	/* The script buffers needs 2 extra qwords on the end */
+	if (kgsl_allocate_global(device, &capturescript,
+		script_size + 16, KGSL_MEMFLAGS_GPUREADONLY,
+		KGSL_MEMDESC_PRIVILEGED, "capturescript"))
+		return;
+
+	if (kgsl_allocate_global(device, &registers, data_size, 0,
+		KGSL_MEMDESC_PRIVILEGED, "capturescript_regs")) {
+		kgsl_free_global(KGSL_DEVICE(adreno_dev), &capturescript);
+		return;
+	}
+	/* Build the crash script */
+
+	ptr = (uint64_t *) capturescript.hostptr;
+
+	/* For the registers, program a read command for each pair */
+	for (i = 0; i < ARRAY_SIZE(_a5xx_cd_registers); i++) {
+		struct cdregs *regs = &_a5xx_cd_registers[i];
+
+		for (j = 0; j < regs->size / 2; j++) {
+			unsigned int r = REG_PAIR_COUNT(regs->regs, j);
+			*ptr++ = registers.gpuaddr + offset;
+			*ptr++ = (((uint64_t) regs->regs[2 * j]) << 44) | r;
+			offset += r * sizeof(unsigned int);
+		}
+	}
+
+	/* Program each shader block */
+	for (i = 0; i < ARRAY_SIZE(a5xx_shader_blocks); i++) {
+		ptr += _a5xx_crashdump_init_shader(&a5xx_shader_blocks[i], ptr,
+			&offset);
+	}
+	/* Program the hlsq sp tp register sets */
+	for (i = 0; i < ARRAY_SIZE(a5xx_hlsq_sp_tp_registers); i++)
+		ptr += _a5xx_crashdump_init_hlsq(&a5xx_hlsq_sp_tp_registers[i],
+			ptr, &offset);
+
+	*ptr++ = 0;
+	*ptr++ = 0;
+}
diff --git a/drivers/gpu/msm/adreno_compat.c b/drivers/gpu/msm/adreno_compat.c
new file mode 100644
index 0000000..d86a0c6
--- /dev/null
+++ b/drivers/gpu/msm/adreno_compat.c
@@ -0,0 +1,231 @@
+/* Copyright (c) 2013-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/uaccess.h>
+#include <linux/ioctl.h>
+
+#include "kgsl.h"
+#include "kgsl_compat.h"
+
+#include "adreno.h"
+#include "adreno_compat.h"
+
+int adreno_getproperty_compat(struct kgsl_device *device,
+				unsigned int type,
+				void __user *value,
+				size_t sizebytes)
+{
+	int status = -EINVAL;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	switch (type) {
+	case KGSL_PROP_DEVICE_INFO:
+		{
+			struct kgsl_devinfo_compat devinfo;
+
+			if (sizebytes != sizeof(devinfo)) {
+				status = -EINVAL;
+				break;
+			}
+
+			memset(&devinfo, 0, sizeof(devinfo));
+			devinfo.device_id = device->id + 1;
+			devinfo.chip_id = adreno_dev->chipid;
+			devinfo.mmu_enabled =
+				MMU_FEATURE(&device->mmu, KGSL_MMU_PAGED);
+			devinfo.gmem_gpubaseaddr = adreno_dev->gmem_base;
+			devinfo.gmem_sizebytes = adreno_dev->gmem_size;
+
+			if (copy_to_user(value, &devinfo, sizeof(devinfo)) !=
+					0) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_DEVICE_SHADOW:
+		{
+			struct kgsl_shadowprop_compat shadowprop;
+
+			if (sizebytes != sizeof(shadowprop)) {
+				status = -EINVAL;
+				break;
+			}
+			memset(&shadowprop, 0, sizeof(shadowprop));
+			if (device->memstore.hostptr) {
+				/*
+				 * NOTE: with mmu enabled, gpuaddr doesn't mean
+				 * anything to mmap().
+				 * NOTE: shadowprop.gpuaddr is uint32
+				 * (because legacy) and the memstore gpuaddr is
+				 * 64 bit. Cast the memstore gpuaddr to uint32.
+				 */
+				shadowprop.gpuaddr =
+					(unsigned int) device->memstore.gpuaddr;
+				shadowprop.size =
+					(unsigned int) device->memstore.size;
+				/*
+				 * GSL needs this to be set, even if it
+				 * appears to be meaningless
+				 */
+				shadowprop.flags = KGSL_FLAGS_INITIALIZED |
+					KGSL_FLAGS_PER_CONTEXT_TIMESTAMPS;
+			}
+			if (copy_to_user(value, &shadowprop,
+				sizeof(shadowprop))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	case KGSL_PROP_DEVICE_QDSS_STM:
+		{
+			struct kgsl_qdss_stm_prop qdssprop = {0};
+			struct kgsl_memdesc *qdss_desc =
+				kgsl_mmu_get_qdss_global_entry(device);
+
+			if (sizebytes != sizeof(qdssprop)) {
+				status = -EINVAL;
+				break;
+			}
+
+			if (qdss_desc) {
+				qdssprop.gpuaddr = qdss_desc->gpuaddr;
+				qdssprop.size = qdss_desc->size;
+			}
+
+			if (copy_to_user(value, &qdssprop,
+						sizeof(qdssprop))) {
+				status = -EFAULT;
+				break;
+			}
+			status = 0;
+		}
+		break;
+	default:
+		/*
+		 * Call the adreno_getproperty to check if the property type
+		 * was KGSL_PROP_MMU_ENABLE or KGSL_PROP_INTERRUPT_WAITS
+		 */
+		status = device->ftbl->getproperty(device, type, value,
+						sizebytes);
+	}
+
+	return status;
+}
+
+int adreno_setproperty_compat(struct kgsl_device_private *dev_priv,
+				unsigned int type,
+				void __user *value,
+				unsigned int sizebytes)
+{
+	int status = -EINVAL;
+	struct kgsl_device *device = dev_priv->device;
+
+	switch (type) {
+	case KGSL_PROP_PWR_CONSTRAINT: {
+			struct kgsl_device_constraint_compat constraint32;
+			struct kgsl_device_constraint constraint;
+			struct kgsl_context *context;
+
+			if (sizebytes != sizeof(constraint32))
+				break;
+
+			if (copy_from_user(&constraint32, value,
+				sizeof(constraint32))) {
+				status = -EFAULT;
+				break;
+			}
+
+			/* Populate the real constraint type from the compat */
+			constraint.type = constraint32.type;
+			constraint.context_id = constraint32.context_id;
+			constraint.data = compat_ptr(constraint32.data);
+			constraint.size = (size_t)constraint32.size;
+
+			context = kgsl_context_get_owner(dev_priv,
+							constraint.context_id);
+			if (context == NULL)
+				break;
+			status = adreno_set_constraint(device, context,
+								&constraint);
+			kgsl_context_put(context);
+		}
+		break;
+	default:
+		/*
+		 * Call adreno_setproperty in case the property type was
+		 * KGSL_PROP_PWRCTRL
+		 */
+		status = device->ftbl->setproperty(dev_priv, type, value,
+						sizebytes);
+	}
+
+	return status;
+}
+
+static long adreno_ioctl_perfcounter_query_compat(
+		struct kgsl_device_private *dev_priv, unsigned int cmd,
+		void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_query_compat *query32 = data;
+	struct kgsl_perfcounter_query query;
+	long result;
+
+	query.groupid = query32->groupid;
+	query.countables = to_user_ptr(query32->countables);
+	query.count = query32->count;
+	query.max_counters = query32->max_counters;
+
+	result = adreno_perfcounter_query_group(adreno_dev,
+		query.groupid, query.countables,
+		query.count, &query.max_counters);
+	query32->max_counters = query.max_counters;
+
+	return result;
+}
+
+static long adreno_ioctl_perfcounter_read_compat(
+		struct kgsl_device_private *dev_priv, unsigned int cmd,
+		void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_read_compat *read32 = data;
+	struct kgsl_perfcounter_read read;
+
+	read.reads = (struct kgsl_perfcounter_read_group __user *)
+		(uintptr_t)read32->reads;
+	read.count = read32->count;
+
+	return adreno_perfcounter_read_group(adreno_dev, read.reads,
+		read.count);
+}
+
+static struct kgsl_ioctl adreno_compat_ioctl_funcs[] = {
+	{ IOCTL_KGSL_PERFCOUNTER_GET, adreno_ioctl_perfcounter_get },
+	{ IOCTL_KGSL_PERFCOUNTER_PUT, adreno_ioctl_perfcounter_put },
+	{ IOCTL_KGSL_PERFCOUNTER_QUERY_COMPAT,
+		adreno_ioctl_perfcounter_query_compat },
+	{ IOCTL_KGSL_PERFCOUNTER_READ_COMPAT,
+		adreno_ioctl_perfcounter_read_compat },
+};
+
+long adreno_compat_ioctl(struct kgsl_device_private *dev_priv,
+			      unsigned int cmd, unsigned long arg)
+{
+	return adreno_ioctl_helper(dev_priv, cmd, arg,
+		adreno_compat_ioctl_funcs,
+		ARRAY_SIZE(adreno_compat_ioctl_funcs));
+}
diff --git a/drivers/gpu/msm/adreno_compat.h b/drivers/gpu/msm/adreno_compat.h
new file mode 100644
index 0000000..55de74c
--- /dev/null
+++ b/drivers/gpu/msm/adreno_compat.h
@@ -0,0 +1,57 @@
+/* Copyright (c) 2013-2015, 2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_COMPAT_H
+#define __ADRENO_COMPAT_H
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#include "kgsl.h"
+#include "kgsl_device.h"
+
+int adreno_getproperty_compat(struct kgsl_device *device,
+			unsigned int type,
+			void __user *value,
+			size_t sizebytes);
+
+int adreno_setproperty_compat(struct kgsl_device_private *dev_priv,
+				unsigned int type,
+				void __user *value,
+				unsigned int sizebytes);
+
+long adreno_compat_ioctl(struct kgsl_device_private *dev_priv,
+			unsigned int cmd, unsigned long arg);
+
+#else
+
+static inline int adreno_getproperty_compat(struct kgsl_device *device,
+				unsigned int type,
+				void __user *value, size_t sizebytes)
+{
+	return -EINVAL;
+}
+
+static inline int adreno_setproperty_compat(struct kgsl_device_private
+				*dev_priv, unsigned int type,
+				void __user *value, unsigned int sizebytes)
+{
+	return -EINVAL;
+}
+
+static inline long adreno_compat_ioctl(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, unsigned long arg)
+{
+	return -EINVAL;
+}
+
+#endif /* CONFIG_COMPAT */
+#endif /* __ADRENO_COMPAT_H */
diff --git a/drivers/gpu/msm/adreno_coresight.c b/drivers/gpu/msm/adreno_coresight.c
new file mode 100644
index 0000000..d792d4e
--- /dev/null
+++ b/drivers/gpu/msm/adreno_coresight.c
@@ -0,0 +1,345 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/platform_device.h>
+#include <linux/coresight.h>
+
+#include "adreno.h"
+
+#define TO_ADRENO_CORESIGHT_ATTR(_attr) \
+	container_of(_attr, struct adreno_coresight_attr, attr)
+
+ssize_t adreno_coresight_show_register(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	unsigned int val = 0;
+	struct kgsl_device *device = dev_get_drvdata(dev->parent);
+	struct adreno_device *adreno_dev;
+	struct adreno_coresight_attr *cattr = TO_ADRENO_CORESIGHT_ATTR(attr);
+
+	if (device == NULL)
+		return -EINVAL;
+
+	adreno_dev = ADRENO_DEVICE(device);
+
+	if (cattr->reg == NULL)
+		return -EINVAL;
+
+	/*
+	 * Return the current value of the register if coresight is enabled,
+	 * otherwise report 0
+	 */
+
+	mutex_lock(&device->mutex);
+	if (test_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv)) {
+
+		/*
+		 * If the device isn't power collapsed read the actual value
+		 * from the hardware - otherwise return the cached value
+		 */
+
+		if (device->state == KGSL_STATE_ACTIVE ||
+			device->state == KGSL_STATE_NAP) {
+			if (!kgsl_active_count_get(device)) {
+				kgsl_regread(device, cattr->reg->offset,
+					&cattr->reg->value);
+				kgsl_active_count_put(device);
+			}
+		}
+
+		val = cattr->reg->value;
+	}
+	mutex_unlock(&device->mutex);
+
+	return snprintf(buf, PAGE_SIZE, "0x%X\n", val);
+}
+
+ssize_t adreno_coresight_store_register(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t size)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev->parent);
+	struct adreno_device *adreno_dev;
+	struct adreno_coresight_attr *cattr = TO_ADRENO_CORESIGHT_ATTR(attr);
+	unsigned long val;
+	int ret;
+
+	if (device == NULL)
+		return -EINVAL;
+
+	adreno_dev = ADRENO_DEVICE(device);
+
+	if (cattr->reg == NULL)
+		return -EINVAL;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+
+	/* Ignore writes while coresight is off */
+	if (!test_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv))
+		goto out;
+
+	cattr->reg->value = val;
+
+	/* Program the hardware if it is not power collapsed */
+	if (device->state == KGSL_STATE_ACTIVE ||
+		device->state == KGSL_STATE_NAP) {
+		if (!kgsl_active_count_get(device)) {
+			kgsl_regwrite(device, cattr->reg->offset,
+					cattr->reg->value);
+			kgsl_active_count_put(device);
+		}
+	}
+
+out:
+	mutex_unlock(&device->mutex);
+	return size;
+}
+
+/**
+ * adreno_coresight_disable() - Generic function to disable coresight debugging
+ * @csdev: Pointer to coresight's device struct
+ *
+ * This is a generic function to disable coresight debug bus on adreno
+ * devices. This should be used in all cases of disabling
+ * coresight debug bus for adreno devices. This function in turn calls
+ * the adreno device specific function through the gpudev hook.
+ * This function is registered as the coresight disable function
+ * with coresight driver. It should only be called through coresight driver
+ * as that would ensure that the necessary setup required to be done on
+ * coresight driver's part is also done.
+ */
+static void adreno_coresight_disable(struct coresight_device *csdev,
+					struct perf_event *event)
+{
+	struct kgsl_device *device = dev_get_drvdata(csdev->dev.parent);
+	struct adreno_device *adreno_dev;
+	struct adreno_gpudev *gpudev;
+	struct adreno_coresight *coresight;
+	int i;
+
+	if (device == NULL)
+		return;
+
+	adreno_dev = ADRENO_DEVICE(device);
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	coresight = gpudev->coresight;
+
+	if (coresight == NULL)
+		return;
+
+	mutex_lock(&device->mutex);
+
+	if (!kgsl_active_count_get(device)) {
+		for (i = 0; i < coresight->count; i++)
+			kgsl_regwrite(device, coresight->registers[i].offset,
+				0);
+
+		kgsl_active_count_put(device);
+	}
+
+	clear_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv);
+
+	mutex_unlock(&device->mutex);
+}
+
+/**
+ * _adreno_coresight_get_and_clear(): Save the current value of coresight
+ * registers and clear the registers subsequently. Clearing registers
+ * has the effect of disabling coresight.
+ * @adreno_dev: Pointer to adreno device struct
+ */
+static int _adreno_coresight_get_and_clear(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_coresight *coresight = gpudev->coresight;
+	int i;
+
+	if (coresight == NULL)
+		return -ENODEV;
+
+	kgsl_pre_hwaccess(device);
+	/*
+	 * Save the current value of each coresight register
+	 * and then clear each register
+	 */
+	for (i = 0; i < coresight->count; i++) {
+		kgsl_regread(device, coresight->registers[i].offset,
+			&coresight->registers[i].value);
+		kgsl_regwrite(device, coresight->registers[i].offset,
+			0);
+	}
+
+	return 0;
+}
+
+static int _adreno_coresight_set(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_coresight *coresight = gpudev->coresight;
+	int i;
+
+	if (coresight == NULL)
+		return -ENODEV;
+
+	for (i = 0; i < coresight->count; i++)
+		kgsl_regwrite(device, coresight->registers[i].offset,
+			coresight->registers[i].value);
+
+	kgsl_property_read_u32(device, "coresight-atid",
+		(unsigned int *)&(coresight->atid));
+
+	return 0;
+}
+/**
+ * adreno_coresight_enable() - Generic function to enable coresight debugging
+ * @csdev: Pointer to coresight's device struct
+ *
+ * This is a generic function to enable coresight debug bus on adreno
+ * devices. This should be used in all cases of enabling
+ * coresight debug bus for adreno devices. This function is registered as the
+ * coresight enable function with coresight driver. It should only be called
+ * through coresight driver as that would ensure that the necessary setup
+ * required to be done on coresight driver's part is also done.
+ */
+static int adreno_coresight_enable(struct coresight_device *csdev,
+				struct perf_event *event, u32 mode)
+{
+	struct kgsl_device *device = dev_get_drvdata(csdev->dev.parent);
+	struct adreno_device *adreno_dev;
+	struct adreno_gpudev *gpudev;
+	struct adreno_coresight *coresight;
+	int ret = 0;
+
+	if (device == NULL)
+		return -ENODEV;
+
+	adreno_dev = ADRENO_DEVICE(device);
+	gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	coresight = gpudev->coresight;
+
+	if (coresight == NULL)
+		return -ENODEV;
+
+	mutex_lock(&device->mutex);
+	if (!test_and_set_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv)) {
+		int i;
+
+		/* Reset all the debug registers to their default values */
+
+		for (i = 0; i < coresight->count; i++)
+			coresight->registers[i].value =
+				coresight->registers[i].initial;
+
+		if (kgsl_state_is_awake(device)) {
+			ret = kgsl_active_count_get(device);
+			if (!ret) {
+				ret = _adreno_coresight_set(adreno_dev);
+				kgsl_active_count_put(device);
+			}
+		}
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return ret;
+}
+
+/**
+ * adreno_coresight_start() - Reprogram coresight registers after power collapse
+ * @adreno_dev: Pointer to the adreno device structure
+ *
+ * Cache the current coresight register values so they can be restored after
+ * power collapse
+ */
+void adreno_coresight_stop(struct adreno_device *adreno_dev)
+{
+	if (test_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv))
+		_adreno_coresight_get_and_clear(adreno_dev);
+}
+
+/**
+ * adreno_coresight_start() - Reprogram coresight registers after power collapse
+ * @adreno_dev: Pointer to the adreno device structure
+ *
+ * Reprogram the cached values to the coresight registers on power up
+ */
+void adreno_coresight_start(struct adreno_device *adreno_dev)
+{
+	if (test_bit(ADRENO_DEVICE_CORESIGHT, &adreno_dev->priv))
+		_adreno_coresight_set(adreno_dev);
+}
+
+static int adreno_coresight_trace_id(struct coresight_device *csdev)
+{
+	struct kgsl_device *device = dev_get_drvdata(csdev->dev.parent);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(ADRENO_DEVICE(device));
+
+	return gpudev->coresight->atid;
+}
+
+static const struct coresight_ops_source adreno_coresight_source_ops = {
+	.trace_id = adreno_coresight_trace_id,
+	.enable = adreno_coresight_enable,
+	.disable = adreno_coresight_disable,
+};
+
+static const struct coresight_ops adreno_coresight_ops = {
+	.source_ops = &adreno_coresight_source_ops,
+};
+
+void adreno_coresight_remove(struct adreno_device *adreno_dev)
+{
+	coresight_unregister(adreno_dev->csdev);
+	adreno_dev->csdev = NULL;
+}
+
+int adreno_coresight_init(struct adreno_device *adreno_dev)
+{
+	int ret = 0;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct coresight_desc desc;
+
+	if (gpudev->coresight == NULL)
+		return -ENODEV;
+
+	if (!IS_ERR_OR_NULL(adreno_dev->csdev))
+		return 0;
+
+	memset(&desc, 0, sizeof(desc));
+
+	desc.pdata = of_get_coresight_platform_data(&device->pdev->dev,
+			device->pdev->dev.of_node);
+	if (IS_ERR_OR_NULL(desc.pdata))
+		return (desc.pdata == NULL) ? -ENODEV :
+			PTR_ERR(desc.pdata);
+
+	desc.type = CORESIGHT_DEV_TYPE_SOURCE;
+	desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_BUS;
+	desc.ops = &adreno_coresight_ops;
+	desc.dev = &device->pdev->dev;
+	desc.groups = gpudev->coresight->groups;
+
+	adreno_dev->csdev = coresight_register(&desc);
+
+	if (IS_ERR(adreno_dev->csdev))
+		ret = PTR_ERR(adreno_dev->csdev);
+
+	return ret;
+}
diff --git a/drivers/gpu/msm/adreno_cp_parser.c b/drivers/gpu/msm/adreno_cp_parser.c
new file mode 100644
index 0000000..2007c10
--- /dev/null
+++ b/drivers/gpu/msm/adreno_cp_parser.c
@@ -0,0 +1,1054 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_snapshot.h"
+
+#include "adreno.h"
+#include "adreno_pm4types.h"
+#include "a3xx_reg.h"
+#include "adreno_cp_parser.h"
+
+#define MAX_IB_OBJS 1000
+#define NUM_SET_DRAW_GROUPS 32
+
+struct set_draw_state {
+	uint64_t cmd_stream_addr;
+	uint64_t cmd_stream_dwords;
+};
+
+/* List of variables used when parsing an IB */
+struct ib_parser_variables {
+	/* List of registers containing addresses and their sizes */
+	unsigned int cp_addr_regs[ADRENO_CP_ADDR_MAX];
+	/* 32 groups of command streams in set draw state packets */
+	struct set_draw_state set_draw_groups[NUM_SET_DRAW_GROUPS];
+};
+
+/*
+ * Used for locating shader objects. This array holds the unit size of shader
+ * objects based on type and block of shader. The type can be 0 or 1 hence there
+ * are 2 columns and block can be 0-7 hence 7 rows.
+ */
+static int load_state_unit_sizes[7][2] = {
+	{ 2, 4 },
+	{ 0, 1 },
+	{ 2, 4 },
+	{ 0, 1 },
+	{ 8, 2 },
+	{ 8, 2 },
+	{ 8, 2 },
+};
+
+static int adreno_ib_find_objs(struct kgsl_device *device,
+				struct kgsl_process_private *process,
+				uint64_t gpuaddr, uint64_t dwords,
+				int obj_type,
+				struct adreno_ib_object_list *ib_obj_list,
+				int ib_level);
+
+static int ib_parse_set_draw_state(struct kgsl_device *device,
+	unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars);
+
+static int ib_parse_type7_set_draw_state(struct kgsl_device *device,
+	unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list);
+
+/*
+ * adreno_ib_merge_range() - Increases the address range tracked by an ib
+ * object
+ * @ib_obj: The ib object
+ * @gpuaddr: The start address which is to be merged
+ * @size: Size of the merging address
+ */
+static void adreno_ib_merge_range(struct adreno_ib_object *ib_obj,
+		uint64_t gpuaddr, uint64_t size)
+{
+	uint64_t addr_end1 = ib_obj->gpuaddr + ib_obj->size;
+	uint64_t addr_end2 = gpuaddr + size;
+
+	if (gpuaddr < ib_obj->gpuaddr)
+		ib_obj->gpuaddr = gpuaddr;
+	if (addr_end2 > addr_end1)
+		ib_obj->size = addr_end2 - ib_obj->gpuaddr;
+	else
+		ib_obj->size = addr_end1 - ib_obj->gpuaddr;
+}
+
+/*
+ * adreno_ib_check_overlap() - Checks if an address range overlap
+ * @gpuaddr: The start address range to check for overlap
+ * @size: Size of the address range
+ * @type: The type of address range
+ * @ib_obj_list: The list of address ranges to check for overlap
+ *
+ * Checks if an address range overlaps with a list of address ranges
+ * Returns the entry from list which overlaps else NULL
+ */
+static struct adreno_ib_object *adreno_ib_check_overlap(uint64_t gpuaddr,
+		uint64_t size, int type,
+		struct adreno_ib_object_list *ib_obj_list)
+{
+	struct adreno_ib_object *ib_obj;
+	int i;
+
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		ib_obj = &(ib_obj_list->obj_list[i]);
+		if ((type == ib_obj->snapshot_obj_type) &&
+			kgsl_addr_range_overlap(ib_obj->gpuaddr, ib_obj->size,
+			gpuaddr, size))
+			/* regions overlap */
+			return ib_obj;
+	}
+	return NULL;
+}
+
+/*
+ * adreno_ib_add() - Add a gpuaddress range to list
+ * @process: Process in which the gpuaddress is mapped
+ * @type: The type of address range
+ * @ib_obj_list: List of the address ranges in which the given range is to be
+ * added
+ *
+ * Add a gpuaddress range as an ib object to a given list after checking if it
+ * overlaps with another entry on the list. If it conflicts then change the
+ * existing entry to incorporate this range
+ *
+ * Returns 0 on success else error code
+ */
+static int adreno_ib_add(struct kgsl_process_private *process,
+				uint64_t gpuaddr, int type,
+				struct adreno_ib_object_list *ib_obj_list)
+{
+	uint64_t size;
+	struct adreno_ib_object *ib_obj;
+	struct kgsl_mem_entry *entry;
+
+	if (ib_obj_list->num_objs >= MAX_IB_OBJS)
+		return -E2BIG;
+
+	entry = kgsl_sharedmem_find(process, gpuaddr);
+	if (!entry)
+		/*
+		 * Do not fail if gpuaddr not found, we can continue
+		 * to search for other objects even if few objects are
+		 * not found
+		 */
+		return 0;
+
+	size = entry->memdesc.size;
+	gpuaddr = entry->memdesc.gpuaddr;
+
+	ib_obj = adreno_ib_check_overlap(gpuaddr, size, type, ib_obj_list);
+	if (ib_obj) {
+		adreno_ib_merge_range(ib_obj, gpuaddr, size);
+		kgsl_mem_entry_put(entry);
+	} else {
+		adreno_ib_init_ib_obj(gpuaddr, size, type, entry,
+			&(ib_obj_list->obj_list[ib_obj_list->num_objs]));
+		ib_obj_list->num_objs++;
+	}
+	return 0;
+}
+
+/*
+ * ib_save_mip_addresses() - Find mip addresses
+ * @pkt: Pointer to the packet in IB
+ * @process: The process in which IB is mapped
+ * @ib_obj_list: List in which any objects found are added
+ *
+ * Returns 0 on success else error code
+ */
+static int ib_save_mip_addresses(unsigned int *pkt,
+		struct kgsl_process_private *process,
+		struct adreno_ib_object_list *ib_obj_list)
+{
+	int ret = 0;
+	int num_levels = (pkt[1] >> 22) & 0x03FF;
+	int i;
+	unsigned int *hostptr;
+	struct kgsl_mem_entry *ent;
+	unsigned int block, type;
+	int unitsize = 0;
+
+	block = (pkt[1] >> 19) & 0x07;
+	type = pkt[2] & 0x03;
+
+	if (type == 0)
+		unitsize = load_state_unit_sizes[block][0];
+	else
+		unitsize = load_state_unit_sizes[block][1];
+
+	if (3 == block && 1 == type) {
+		uint64_t gpuaddr = pkt[2] & 0xFFFFFFFC;
+		uint64_t size = (num_levels * unitsize) << 2;
+
+		ent = kgsl_sharedmem_find(process, gpuaddr);
+		if (ent == NULL)
+			return 0;
+
+		if (!kgsl_gpuaddr_in_memdesc(&ent->memdesc,
+			gpuaddr, size)) {
+			kgsl_mem_entry_put(ent);
+			return 0;
+		}
+
+		hostptr = kgsl_gpuaddr_to_vaddr(&ent->memdesc, gpuaddr);
+		if (hostptr != NULL) {
+			for (i = 0; i < num_levels; i++) {
+				ret = adreno_ib_add(process, hostptr[i],
+					SNAPSHOT_GPU_OBJECT_GENERIC,
+					ib_obj_list);
+				if (ret)
+					break;
+			}
+		}
+
+		kgsl_memdesc_unmap(&ent->memdesc);
+		kgsl_mem_entry_put(ent);
+	}
+	return ret;
+}
+
+/*
+ * ib_parse_load_state() - Parse load state packet
+ * @pkt: Pointer to the packet in IB
+ * @process: The pagetable in which the IB is mapped
+ * @ib_obj_list: List in which any objects found are added
+ * @ib_parse_vars: VAriable list that store temporary addressses
+ *
+ * Parse load state packet found in an IB and add any memory object found to
+ * a list
+ * Returns 0 on success else error code
+ */
+static int ib_parse_load_state(unsigned int *pkt,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int ret = 0;
+	int i;
+
+	/*
+	 * The object here is to find indirect shaders i.e - shaders loaded from
+	 * GPU memory instead of directly in the command.  These should be added
+	 * to the list of memory objects to dump. So look at the load state
+	 * if the block is indirect (source = 4). If so then add the memory
+	 * address to the list.  The size of the object differs depending on the
+	 * type per the load_state_unit_sizes array above.
+	 */
+
+	if (type3_pkt_size(pkt[0]) < 2)
+		return 0;
+
+	/*
+	 * Anything from 3rd ordinal onwards of packet can be a memory object,
+	 * no need to be fancy about parsing it, just save it if it looks
+	 * like memory
+	 */
+	for (i = 0; i <= (type3_pkt_size(pkt[0]) - 2); i++) {
+		ret |= adreno_ib_add(process, pkt[2 + i] & 0xFFFFFFFC,
+				SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		if (ret)
+			break;
+	}
+	/* get the mip addresses */
+	if (!ret)
+		ret = ib_save_mip_addresses(pkt, process, ib_obj_list);
+	return ret;
+}
+
+/*
+ * This opcode sets the base addresses for the visibilty stream buffer and the
+ * visiblity stream size buffer.
+ */
+
+static int ib_parse_set_bin_data(unsigned int *pkt,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int ret = 0;
+
+	if (type3_pkt_size(pkt[0]) < 2)
+		return 0;
+
+	/* Visiblity stream buffer */
+	ret = adreno_ib_add(process, pkt[1],
+		SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+	if (ret)
+		return ret;
+
+	/* visiblity stream size buffer (fixed size 8 dwords) */
+	ret = adreno_ib_add(process, pkt[2],
+		SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+
+	return ret;
+}
+
+/*
+ * This opcode writes to GPU memory - if the buffer is written to, there is a
+ * good chance that it would be valuable to capture in the snapshot, so mark all
+ * buffers that are written to as frozen
+ */
+
+static int ib_parse_mem_write(unsigned int *pkt,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	if (type3_pkt_size(pkt[0]) < 1)
+		return 0;
+
+	/*
+	 * The address is where the data in the rest of this packet is written
+	 * to, but since that might be an offset into the larger buffer we need
+	 * to get the whole thing. Pass a size of 0 tocapture the entire buffer.
+	 */
+
+	return adreno_ib_add(process, pkt[1] & 0xFFFFFFFC,
+		SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+}
+
+/*
+ * ib_add_type0_entries() - Add memory objects to list
+ * @device: The device on which the IB will execute
+ * @process: The process in which IB is mapped
+ * @ib_obj_list: The list of gpu objects
+ * @ib_parse_vars: addresses ranges found in type0 packets
+ *
+ * Add memory objects to given list that are found in type0 packets
+ * Returns 0 on success else 0
+ */
+static int ib_add_type0_entries(struct kgsl_device *device,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret = 0;
+	int i;
+	int vfd_end;
+	unsigned int mask;
+	/* First up the visiblity stream buffer */
+	if (adreno_is_a4xx(adreno_dev))
+		mask = 0xFFFFFFFC;
+	else
+		mask = 0xFFFFFFFF;
+	for (i = ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0;
+		i < ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7; i++) {
+		if (ib_parse_vars->cp_addr_regs[i]) {
+			ret = adreno_ib_add(process,
+				ib_parse_vars->cp_addr_regs[i] & mask,
+				SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+			if (ret)
+				return ret;
+			ib_parse_vars->cp_addr_regs[i] = 0;
+			ib_parse_vars->cp_addr_regs[i + 1] = 0;
+			i++;
+		}
+	}
+
+	vfd_end = adreno_is_a4xx(adreno_dev) ?
+		ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31 :
+		ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15;
+	for (i = ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0;
+		i <= vfd_end; i++) {
+		if (ib_parse_vars->cp_addr_regs[i]) {
+			ret = adreno_ib_add(process,
+				ib_parse_vars->cp_addr_regs[i],
+				SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+			if (ret)
+				return ret;
+			ib_parse_vars->cp_addr_regs[i] = 0;
+		}
+	}
+
+	if (ib_parse_vars->cp_addr_regs[ADRENO_CP_ADDR_VSC_SIZE_ADDRESS]) {
+		ret = adreno_ib_add(process,
+			ib_parse_vars->cp_addr_regs[
+				ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] & mask,
+			SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+		if (ret)
+			return ret;
+		ib_parse_vars->cp_addr_regs[
+			ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] = 0;
+	}
+	mask = 0xFFFFFFE0;
+	for (i = ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR;
+		i <= ADRENO_CP_ADDR_SP_FS_OBJ_START_REG; i++) {
+		ret = adreno_ib_add(process,
+			ib_parse_vars->cp_addr_regs[i] & mask,
+			SNAPSHOT_GPU_OBJECT_GENERIC, ib_obj_list);
+		if (ret)
+			return ret;
+		ib_parse_vars->cp_addr_regs[i] = 0;
+	}
+	return ret;
+}
+/*
+ * The DRAW_INDX opcode sends a draw initator which starts a draw operation in
+ * the GPU, so this is the point where all the registers and buffers become
+ * "valid".  The DRAW_INDX may also have an index buffer pointer that should be
+ * frozen with the others
+ */
+
+static int ib_parse_draw_indx(struct kgsl_device *device, unsigned int *pkt,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int ret = 0;
+	int i;
+	int opcode = cp_type3_opcode(pkt[0]);
+
+	switch (opcode) {
+	case CP_DRAW_INDX:
+		if (type3_pkt_size(pkt[0]) > 3) {
+			ret = adreno_ib_add(process,
+				pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	case CP_DRAW_INDX_OFFSET:
+		if (type3_pkt_size(pkt[0]) == 6) {
+			ret = adreno_ib_add(process,
+				pkt[5], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	case CP_DRAW_INDIRECT:
+		if (type3_pkt_size(pkt[0]) == 2) {
+			ret = adreno_ib_add(process,
+				pkt[2], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	case CP_DRAW_INDX_INDIRECT:
+		if (type3_pkt_size(pkt[0]) == 4) {
+			ret = adreno_ib_add(process,
+				pkt[2], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+			if (ret)
+				break;
+			ret = adreno_ib_add(process,
+				pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	case CP_DRAW_AUTO:
+		if (type3_pkt_size(pkt[0]) == 6) {
+			ret = adreno_ib_add(process,
+				 pkt[3], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+			if (ret)
+				break;
+			ret = adreno_ib_add(process,
+				pkt[4], SNAPSHOT_GPU_OBJECT_GENERIC,
+				ib_obj_list);
+		}
+		break;
+	}
+
+	if (ret)
+		return ret;
+	/*
+	 * All of the type0 writes are valid at a draw initiator, so freeze
+	 * the various buffers that we are tracking
+	 */
+	ret = ib_add_type0_entries(device, process, ib_obj_list,
+				ib_parse_vars);
+	if (ret)
+		return ret;
+	/* Process set draw state command streams if any */
+	for (i = 0; i < NUM_SET_DRAW_GROUPS; i++) {
+		if (!ib_parse_vars->set_draw_groups[i].cmd_stream_dwords)
+			continue;
+		ret = adreno_ib_find_objs(device, process,
+			ib_parse_vars->set_draw_groups[i].cmd_stream_addr,
+			ib_parse_vars->set_draw_groups[i].cmd_stream_dwords,
+			SNAPSHOT_GPU_OBJECT_DRAW,
+			ib_obj_list, 2);
+		if (ret)
+			break;
+	}
+	return ret;
+}
+
+/*
+ * Parse all the type7 opcode packets that may contain important information,
+ * such as additional GPU buffers to grab or a draw initator
+ */
+
+static int ib_parse_type7(struct kgsl_device *device, unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int opcode = cp_type7_opcode(*ptr);
+
+	switch (opcode) {
+	case CP_SET_DRAW_STATE:
+		return ib_parse_type7_set_draw_state(device, ptr, process,
+					ib_obj_list);
+	}
+
+	return 0;
+}
+
+/*
+ * Parse all the type3 opcode packets that may contain important information,
+ * such as additional GPU buffers to grab or a draw initator
+ */
+
+static int ib_parse_type3(struct kgsl_device *device, unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int opcode = cp_type3_opcode(*ptr);
+
+	switch (opcode) {
+	case  CP_LOAD_STATE:
+		return ib_parse_load_state(ptr, process, ib_obj_list,
+					ib_parse_vars);
+	case CP_SET_BIN_DATA:
+		return ib_parse_set_bin_data(ptr, process, ib_obj_list,
+					ib_parse_vars);
+	case CP_MEM_WRITE:
+		return ib_parse_mem_write(ptr, process, ib_obj_list,
+					ib_parse_vars);
+	case CP_DRAW_INDX:
+	case CP_DRAW_INDX_OFFSET:
+	case CP_DRAW_INDIRECT:
+	case CP_DRAW_INDX_INDIRECT:
+		return ib_parse_draw_indx(device, ptr, process, ib_obj_list,
+					ib_parse_vars);
+	case CP_SET_DRAW_STATE:
+		return ib_parse_set_draw_state(device, ptr, process,
+					ib_obj_list, ib_parse_vars);
+	}
+
+	return 0;
+}
+
+/*
+ * Parse type0 packets found in the stream.  Some of the registers that are
+ * written are clues for GPU buffers that we need to freeze.  Register writes
+ * are considred valid when a draw initator is called, so just cache the values
+ * here and freeze them when a CP_DRAW_INDX is seen.  This protects against
+ * needlessly caching buffers that won't be used during a draw call
+ */
+
+static int ib_parse_type0(struct kgsl_device *device, unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int size = type0_pkt_size(*ptr);
+	int offset = type0_pkt_offset(*ptr);
+	int i;
+	int reg_index;
+	int ret = 0;
+
+	for (i = 0; i < size; i++, offset++) {
+		/* Visiblity stream buffer */
+		if (offset >= adreno_cp_parser_getreg(adreno_dev,
+				ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0) &&
+			offset <= adreno_cp_parser_getreg(adreno_dev,
+				ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7)) {
+			reg_index = adreno_cp_parser_regindex(
+					adreno_dev, offset,
+					ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0,
+					ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7);
+			if (reg_index >= 0)
+				ib_parse_vars->cp_addr_regs[reg_index] =
+								ptr[i + 1];
+			continue;
+		} else if ((offset >= adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0)) &&
+			(offset <= adreno_cp_parser_getreg(adreno_dev,
+				ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15))) {
+			reg_index = adreno_cp_parser_regindex(adreno_dev,
+					offset,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15);
+			if (reg_index >= 0)
+				ib_parse_vars->cp_addr_regs[reg_index] =
+								ptr[i + 1];
+			continue;
+		} else if ((offset >= adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16)) &&
+			(offset <= adreno_cp_parser_getreg(adreno_dev,
+				ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31))) {
+			reg_index = adreno_cp_parser_regindex(adreno_dev,
+					offset,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16,
+					ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31);
+			if (reg_index >= 0)
+				ib_parse_vars->cp_addr_regs[reg_index] =
+								ptr[i + 1];
+			continue;
+		} else {
+			if (offset ==
+				adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_VSC_SIZE_ADDRESS))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_VSC_SIZE_ADDRESS] =
+						ptr[i + 1];
+			else if (offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR] =
+						ptr[i + 1];
+			else if (offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR] =
+						ptr[i + 1];
+			else if (offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_SP_VS_OBJ_START_REG))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_SP_VS_OBJ_START_REG] =
+						ptr[i + 1];
+			else if (offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_ADDR_SP_FS_OBJ_START_REG))
+				ib_parse_vars->cp_addr_regs[
+					ADRENO_CP_ADDR_SP_FS_OBJ_START_REG] =
+						ptr[i + 1];
+			else if ((offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_UCHE_INVALIDATE0)) ||
+				(offset == adreno_cp_parser_getreg(adreno_dev,
+					ADRENO_CP_UCHE_INVALIDATE1))) {
+				ret = adreno_ib_add(process,
+					ptr[i + 1] & 0xFFFFFFC0,
+					SNAPSHOT_GPU_OBJECT_GENERIC,
+					ib_obj_list);
+				if (ret)
+					break;
+			}
+		}
+	}
+	return ret;
+}
+
+static int ib_parse_type7_set_draw_state(struct kgsl_device *device,
+	unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list)
+{
+	int size = type7_pkt_size(*ptr);
+	int i;
+	int grp_id;
+	int ret = 0;
+	int flags;
+	uint64_t cmd_stream_dwords;
+	uint64_t cmd_stream_addr;
+
+	/*
+	 * size is the size of the packet that does not include the DWORD
+	 * for the packet header, we only want to loop here through the
+	 * packet parameters from ptr[1] till ptr[size] where ptr[0] is the
+	 * packet header. In each loop we look at 3 DWORDS hence increment
+	 * loop counter by 3 always
+	 */
+	for (i = 1; i <= size; i += 3) {
+		grp_id = (ptr[i] & 0x1F000000) >> 24;
+		/* take action based on flags */
+		flags = (ptr[i] & 0x000F0000) >> 16;
+
+		/*
+		 * dirty flag or no flags both mean we need to load it for
+		 * next draw. No flags is used when the group is activated
+		 * or initialized for the first time in the IB
+		 */
+		if (flags & 0x1 || !flags) {
+			cmd_stream_dwords = ptr[i] & 0x0000FFFF;
+			cmd_stream_addr = ptr[i + 2];
+			cmd_stream_addr = cmd_stream_addr << 32 | ptr[i + 1];
+			if (cmd_stream_dwords)
+				ret = adreno_ib_find_objs(device, process,
+					cmd_stream_addr, cmd_stream_dwords,
+					SNAPSHOT_GPU_OBJECT_DRAW, ib_obj_list,
+					2);
+			if (ret)
+				break;
+			continue;
+		}
+		/* load immediate */
+		if (flags & 0x8) {
+			uint64_t gpuaddr = ptr[i + 2];
+
+			gpuaddr = gpuaddr << 32 | ptr[i + 1];
+			ret = adreno_ib_find_objs(device, process,
+				gpuaddr, (ptr[i] & 0x0000FFFF),
+				SNAPSHOT_GPU_OBJECT_IB,
+				ib_obj_list, 2);
+			if (ret)
+				break;
+		}
+	}
+	return ret;
+}
+
+static int ib_parse_set_draw_state(struct kgsl_device *device,
+	unsigned int *ptr,
+	struct kgsl_process_private *process,
+	struct adreno_ib_object_list *ib_obj_list,
+	struct ib_parser_variables *ib_parse_vars)
+{
+	int size = type0_pkt_size(*ptr);
+	int i;
+	int grp_id;
+	int ret = 0;
+	int flags;
+
+	/*
+	 * size is the size of the packet that does not include the DWORD
+	 * for the packet header, we only want to loop here through the
+	 * packet parameters from ptr[1] till ptr[size] where ptr[0] is the
+	 * packet header. In each loop we look at 2 DWORDS hence increment
+	 * loop counter by 2 always
+	 */
+	for (i = 1; i <= size; i += 2) {
+		grp_id = (ptr[i] & 0x1F000000) >> 24;
+		/* take action based on flags */
+		flags = (ptr[i] & 0x000F0000) >> 16;
+		/* Disable all groups */
+		if (flags & 0x4) {
+			int j;
+
+			for (j = 0; j < NUM_SET_DRAW_GROUPS; j++)
+				ib_parse_vars->set_draw_groups[j].
+					cmd_stream_dwords = 0;
+			continue;
+		}
+		/* disable flag */
+		if (flags & 0x2) {
+			ib_parse_vars->set_draw_groups[grp_id].
+						cmd_stream_dwords = 0;
+			continue;
+		}
+		/*
+		 * dirty flag or no flags both mean we need to load it for
+		 * next draw. No flags is used when the group is activated
+		 * or initialized for the first time in the IB
+		 */
+		if (flags & 0x1 || !flags) {
+			ib_parse_vars->set_draw_groups[grp_id].
+				cmd_stream_dwords = ptr[i] & 0x0000FFFF;
+			ib_parse_vars->set_draw_groups[grp_id].
+				cmd_stream_addr = ptr[i + 1];
+			continue;
+		}
+		/* load immediate */
+		if (flags & 0x8) {
+			ret = adreno_ib_find_objs(device, process,
+				ptr[i + 1], (ptr[i] & 0x0000FFFF),
+				SNAPSHOT_GPU_OBJECT_IB,
+				ib_obj_list, 2);
+			if (ret)
+				break;
+		}
+	}
+	return ret;
+}
+
+/*
+ * adreno_cp_parse_ib2() - Wrapper function around IB2 parsing
+ * @device: Device pointer
+ * @process: Process in which the IB is allocated
+ * @gpuaddr: IB2 gpuaddr
+ * @dwords: IB2 size in dwords
+ * @ib_obj_list: List of objects found in IB
+ * @ib_level: The level from which function is called, either from IB1 or IB2
+ *
+ * Function does some checks to ensure that IB2 parsing is called from IB1
+ * and then calls the function to find objects in IB2.
+ */
+static int adreno_cp_parse_ib2(struct kgsl_device *device,
+			struct kgsl_process_private *process,
+			uint64_t gpuaddr, uint64_t dwords,
+			struct adreno_ib_object_list *ib_obj_list,
+			int ib_level)
+{
+	int i;
+
+	/*
+	 * We can only expect an IB2 in IB1, if we are
+	 * already processing an IB2 then return error
+	 */
+	if (ib_level == 2)
+		return -EINVAL;
+	/*
+	 * only try to find sub objects iff this IB has
+	 * not been processed already
+	 */
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		struct adreno_ib_object *ib_obj = &(ib_obj_list->obj_list[i]);
+
+		if ((ib_obj->snapshot_obj_type == SNAPSHOT_GPU_OBJECT_IB) &&
+			(gpuaddr >= ib_obj->gpuaddr) &&
+			(gpuaddr + dwords * sizeof(unsigned int) <=
+			ib_obj->gpuaddr + ib_obj->size))
+			return 0;
+	}
+
+	return adreno_ib_find_objs(device, process, gpuaddr, dwords,
+		SNAPSHOT_GPU_OBJECT_IB, ib_obj_list, 2);
+}
+
+/*
+ * adreno_ib_find_objs() - Find all IB objects in a given IB
+ * @device: The device pointer on which the IB executes
+ * @process: The process in which the IB and all contained objects are mapped.
+ * @gpuaddr: The gpu address of the IB
+ * @dwords: Size of ib in dwords
+ * @obj_type: The object type can be either an IB or a draw state sequence
+ * @ib_obj_list: The list in which the IB and the objects in it are added.
+ * @ib_level: Indicates if IB1 or IB2 is being processed
+ *
+ * Finds all IB objects in a given IB and puts then in a list. Can be called
+ * recursively for the IB2's in the IB1's
+ * Returns 0 on success else error code
+ */
+static int adreno_ib_find_objs(struct kgsl_device *device,
+				struct kgsl_process_private *process,
+				uint64_t gpuaddr, uint64_t dwords,
+				int obj_type,
+				struct adreno_ib_object_list *ib_obj_list,
+				int ib_level)
+{
+	int ret = 0;
+	uint64_t rem = dwords;
+	int i;
+	struct ib_parser_variables ib_parse_vars;
+	unsigned int *src;
+	struct adreno_ib_object *ib_obj;
+	struct kgsl_mem_entry *entry;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* check that this IB is not already on list */
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		ib_obj = &(ib_obj_list->obj_list[i]);
+		if ((obj_type == ib_obj->snapshot_obj_type) &&
+			(ib_obj->gpuaddr <= gpuaddr) &&
+			((ib_obj->gpuaddr + ib_obj->size) >=
+			(gpuaddr + (dwords << 2))))
+			return 0;
+	}
+
+	entry = kgsl_sharedmem_find(process, gpuaddr);
+	if (!entry)
+		return -EINVAL;
+
+	if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, (dwords << 2))) {
+		kgsl_mem_entry_put(entry);
+		return -EINVAL;
+	}
+
+	src = kgsl_gpuaddr_to_vaddr(&entry->memdesc, gpuaddr);
+	if (!src) {
+		kgsl_mem_entry_put(entry);
+		return -EINVAL;
+	}
+
+	memset(&ib_parse_vars, 0, sizeof(struct ib_parser_variables));
+
+	ret = adreno_ib_add(process, gpuaddr, obj_type, ib_obj_list);
+	if (ret)
+		goto done;
+
+	for (i = 0; rem > 0; rem--, i++) {
+		int pktsize;
+
+		if (pkt_is_type0(src[i]))
+			pktsize = type0_pkt_size(src[i]);
+
+		else if (pkt_is_type3(src[i]))
+			pktsize = type3_pkt_size(src[i]);
+
+		else if (pkt_is_type4(src[i]))
+			pktsize = type4_pkt_size(src[i]);
+
+		else if (pkt_is_type7(src[i]))
+			pktsize = type7_pkt_size(src[i]);
+
+		/*
+		 * If the packet isn't a type 1, type 3, type 4 or type 7 then
+		 * don't bother parsing it - it is likely corrupted
+		 */
+		else
+			break;
+
+		if (((pkt_is_type0(src[i]) || pkt_is_type3(src[i])) && !pktsize)
+			|| ((pktsize + 1) > rem))
+			break;
+
+		if (pkt_is_type3(src[i])) {
+			if (adreno_cmd_is_ib(adreno_dev, src[i])) {
+				uint64_t gpuaddrib2 = src[i + 1];
+				uint64_t size = src[i + 2];
+
+				ret = adreno_cp_parse_ib2(device, process,
+						gpuaddrib2, size,
+						ib_obj_list, ib_level);
+				if (ret)
+					goto done;
+			} else {
+				ret = ib_parse_type3(device, &src[i], process,
+						ib_obj_list,
+						&ib_parse_vars);
+				/*
+				 * If the parse function failed (probably
+				 * because of a bad decode) then bail out and
+				 * just capture the binary IB data
+				 */
+
+				if (ret)
+					goto done;
+			}
+		}
+
+		else if (pkt_is_type7(src[i])) {
+			if (adreno_cmd_is_ib(adreno_dev, src[i])) {
+				uint64_t size = src[i + 3];
+				uint64_t gpuaddrib2 = src[i + 2];
+
+				gpuaddrib2 = gpuaddrib2 << 32 | src[i + 1];
+
+				ret = adreno_cp_parse_ib2(device, process,
+						gpuaddrib2, size,
+						ib_obj_list, ib_level);
+				if (ret)
+					goto done;
+			} else {
+				ret = ib_parse_type7(device, &src[i], process,
+						ib_obj_list,
+						&ib_parse_vars);
+				/*
+				 * If the parse function failed (probably
+				 * because of a bad decode) then bail out and
+				 * just capture the binary IB data
+				 */
+
+				if (ret)
+					goto done;
+			}
+		}
+
+		else if (pkt_is_type0(src[i])) {
+			ret = ib_parse_type0(device, &src[i], process,
+					ib_obj_list, &ib_parse_vars);
+			if (ret)
+				goto done;
+		}
+
+		i += pktsize;
+		rem -= pktsize;
+	}
+
+done:
+	/*
+	 * For set draw objects there may not be a draw_indx packet at its end
+	 * to signal that we need to save the found objects in it, so just save
+	 * it here.
+	 */
+	if (!ret && SNAPSHOT_GPU_OBJECT_DRAW == obj_type)
+		ret = ib_add_type0_entries(device, process, ib_obj_list,
+			&ib_parse_vars);
+
+	kgsl_memdesc_unmap(&entry->memdesc);
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
+
+/*
+ * adreno_ib_create_object_list() - Find all the memory objects in IB
+ * @device: The device pointer on which the IB executes
+ * @process: The process in which the IB and all contained objects are mapped
+ * @gpuaddr: The gpu address of the IB
+ * @dwords: Size of ib in dwords
+ * @ib_obj_list: The list in which the IB and the objects in it are added.
+ *
+ * Find all the memory objects that an IB needs for execution and place
+ * them in a list including the IB.
+ * Returns the ib object list. On success 0 is returned, on failure error
+ * code is returned along with number of objects that was saved before
+ * error occurred. If no objects found then the list pointer is set to
+ * NULL.
+ */
+int adreno_ib_create_object_list(struct kgsl_device *device,
+		struct kgsl_process_private *process,
+		uint64_t gpuaddr, uint64_t dwords,
+		struct adreno_ib_object_list **out_ib_obj_list)
+{
+	int ret = 0;
+	struct adreno_ib_object_list *ib_obj_list;
+
+	if (!out_ib_obj_list)
+		return -EINVAL;
+
+	*out_ib_obj_list = NULL;
+
+	ib_obj_list = kzalloc(sizeof(*ib_obj_list), GFP_KERNEL);
+	if (!ib_obj_list)
+		return -ENOMEM;
+
+	ib_obj_list->obj_list = vmalloc(MAX_IB_OBJS *
+					sizeof(struct adreno_ib_object));
+
+	if (!ib_obj_list->obj_list) {
+		kfree(ib_obj_list);
+		return -ENOMEM;
+	}
+
+	ret = adreno_ib_find_objs(device, process, gpuaddr, dwords,
+		SNAPSHOT_GPU_OBJECT_IB, ib_obj_list, 1);
+
+	/* Even if there was an error return the remaining objects found */
+	if (ib_obj_list->num_objs)
+		*out_ib_obj_list = ib_obj_list;
+
+	return ret;
+}
+
+/*
+ * adreno_ib_destroy_obj_list() - Destroy an ib object list
+ * @ib_obj_list: List to destroy
+ *
+ * Free up all resources used by an ib_obj_list
+ */
+void adreno_ib_destroy_obj_list(struct adreno_ib_object_list *ib_obj_list)
+{
+	int i;
+
+	if (!ib_obj_list)
+		return;
+
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		if (ib_obj_list->obj_list[i].entry)
+			kgsl_mem_entry_put(ib_obj_list->obj_list[i].entry);
+	}
+	vfree(ib_obj_list->obj_list);
+	kfree(ib_obj_list);
+}
diff --git a/drivers/gpu/msm/adreno_cp_parser.h b/drivers/gpu/msm/adreno_cp_parser.h
new file mode 100644
index 0000000..cdd983e
--- /dev/null
+++ b/drivers/gpu/msm/adreno_cp_parser.h
@@ -0,0 +1,187 @@
+/* Copyright (c) 2013-2014, 2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __ADRENO_IB_PARSER__
+#define __ADRENO_IB_PARSER__
+
+#include "adreno.h"
+
+extern const unsigned int a3xx_cp_addr_regs[];
+extern const unsigned int a4xx_cp_addr_regs[];
+
+/*
+ * struct adreno_ib_object - Structure containing information about an
+ * address range found in an IB
+ * @gpuaddr: The starting gpuaddress of the range
+ * @size: Size of the range
+ * @snapshot_obj_type - Type of range used in snapshot
+ * @entry: The memory entry in which this range is found
+ */
+struct adreno_ib_object {
+	uint64_t gpuaddr;
+	uint64_t size;
+	int snapshot_obj_type;
+	struct kgsl_mem_entry *entry;
+};
+
+/*
+ * struct adreno_ib_object_list - List of address ranges found in IB
+ * @obj_list: The address range list
+ * @num_objs: Number of objects in list
+ */
+struct adreno_ib_object_list {
+	struct adreno_ib_object *obj_list;
+	int num_objs;
+};
+
+/*
+ * adreno registers used during IB parsing, there contain addresses
+ * and sizes of the addresses that present in an IB
+ */
+enum adreno_cp_addr_regs {
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_0 = 0,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_0,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_1,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_1,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_2,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_2,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_3,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_3,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_4,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_4,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_5,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_5,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_6,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_6,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_ADDRESS_7,
+	ADRENO_CP_ADDR_VSC_PIPE_DATA_LENGTH_7,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_0,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_1,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_2,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_3,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_4,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_5,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_6,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_7,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_8,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_9,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_10,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_11,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_12,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_13,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_14,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_15,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_16,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_17,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_18,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_19,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_20,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_21,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_22,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_23,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_24,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_25,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_26,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_27,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_28,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_29,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_30,
+	ADRENO_CP_ADDR_VFD_FETCH_INSTR_1_31,
+	ADRENO_CP_ADDR_VSC_SIZE_ADDRESS,
+	ADRENO_CP_ADDR_SP_VS_PVT_MEM_ADDR,
+	ADRENO_CP_ADDR_SP_FS_PVT_MEM_ADDR,
+	ADRENO_CP_ADDR_SP_VS_OBJ_START_REG,
+	ADRENO_CP_ADDR_SP_FS_OBJ_START_REG,
+	ADRENO_CP_UCHE_INVALIDATE0,
+	ADRENO_CP_UCHE_INVALIDATE1,
+	ADRENO_CP_ADDR_MAX,
+};
+
+/*
+ * adreno_ib_init_ib_obj() - Create an ib object structure and initialize it
+ * with gpuaddress and size
+ * @gpuaddr: gpuaddr with which to initialize the object with
+ * @size: Size in bytes with which the object is initialized
+ * @ib_type: The IB type used by snapshot
+ *
+ * Returns the object pointer on success else error code in the pointer
+ */
+static inline void adreno_ib_init_ib_obj(uint64_t gpuaddr,
+			uint64_t size, int obj_type,
+			struct kgsl_mem_entry *entry,
+			struct adreno_ib_object *ib_obj)
+{
+	ib_obj->gpuaddr = gpuaddr;
+	ib_obj->size = size;
+	ib_obj->snapshot_obj_type = obj_type;
+	ib_obj->entry = entry;
+}
+
+/*
+ * adreno_cp_parser_getreg() - Returns the value of register offset
+ * @adreno_dev: The adreno device being operated upon
+ * @reg_enum: Enum index of the register whose offset is returned
+ */
+static inline int adreno_cp_parser_getreg(struct adreno_device *adreno_dev,
+					enum adreno_cp_addr_regs reg_enum)
+{
+	if (reg_enum == ADRENO_CP_ADDR_MAX)
+		return -EEXIST;
+
+	if (adreno_is_a3xx(adreno_dev))
+		return a3xx_cp_addr_regs[reg_enum];
+	else if (adreno_is_a4xx(adreno_dev))
+		return a4xx_cp_addr_regs[reg_enum];
+	else
+		return -EEXIST;
+}
+
+/*
+ * adreno_cp_parser_regindex() - Returns enum index for a given register offset
+ * @adreno_dev: The adreno device being operated upon
+ * @offset: Register offset
+ * @start: The start index to search from
+ * @end: The last index to search
+ *
+ * Checks the list of registers defined for the device and returns the index
+ * whose offset value matches offset parameter.
+ */
+static inline int adreno_cp_parser_regindex(struct adreno_device *adreno_dev,
+				unsigned int offset,
+				enum adreno_cp_addr_regs start,
+				enum adreno_cp_addr_regs end)
+{
+	int i;
+	const unsigned int *regs;
+
+	if (adreno_is_a4xx(adreno_dev))
+		regs = a4xx_cp_addr_regs;
+	else if (adreno_is_a3xx(adreno_dev))
+		regs = a3xx_cp_addr_regs;
+	else
+		return -EEXIST;
+
+	for (i = start; i <= end && i < ADRENO_CP_ADDR_MAX; i++)
+		if (regs[i] == offset)
+			return i;
+	return -EEXIST;
+}
+
+int adreno_ib_create_object_list(
+		struct kgsl_device *device,
+		struct kgsl_process_private *process,
+		uint64_t gpuaddr, uint64_t dwords,
+		struct adreno_ib_object_list **out_ib_obj_list);
+
+void adreno_ib_destroy_obj_list(struct adreno_ib_object_list *ib_obj_list);
+
+#endif
diff --git a/drivers/gpu/msm/adreno_debugfs.c b/drivers/gpu/msm/adreno_debugfs.c
new file mode 100644
index 0000000..f6c9805
--- /dev/null
+++ b/drivers/gpu/msm/adreno_debugfs.c
@@ -0,0 +1,388 @@
+/* Copyright (c) 2002,2008-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/delay.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/io.h>
+
+#include "kgsl.h"
+#include "adreno.h"
+#include "kgsl_sync.h"
+
+static int _isdb_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	/* Once ISDB goes enabled it stays enabled */
+	if (test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv))
+		return 0;
+
+	mutex_lock(&device->mutex);
+
+	/*
+	 * Bring down the GPU so we can bring it back up with the correct power
+	 * and clock settings
+	 */
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+	set_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static int _isdb_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	*val = (u64) test_bit(ADRENO_DEVICE_ISDB_ENABLED, &adreno_dev->priv);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(_isdb_fops, _isdb_get, _isdb_set, "%llu\n");
+
+static int _lm_limit_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		return 0;
+
+	/* assure value is between 3A and 10A */
+	if (val > 10000)
+		val = 10000;
+	else if (val < 3000)
+		val = 3000;
+
+	adreno_dev->lm_limit = val;
+
+	if (test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag)) {
+		mutex_lock(&device->mutex);
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+		mutex_unlock(&device->mutex);
+	}
+
+	return 0;
+}
+
+static int _lm_limit_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		*val = 0;
+
+	*val = (u64) adreno_dev->lm_limit;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(_lm_limit_fops, _lm_limit_get, _lm_limit_set, "%llu\n");
+
+static int _lm_threshold_count_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_LM))
+		*val = 0;
+	else
+		*val = (u64) adreno_dev->lm_threshold_cross;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(_lm_threshold_fops, _lm_threshold_count_get,
+	NULL, "%llu\n");
+
+static int _active_count_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	unsigned int i = atomic_read(&device->active_cnt);
+
+	*val = (u64) i;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(_active_count_fops, _active_count_get, NULL, "%llu\n");
+
+typedef void (*reg_read_init_t)(struct kgsl_device *device);
+typedef void (*reg_read_fill_t)(struct kgsl_device *device, int i,
+	unsigned int *vals, int linec);
+
+
+static void sync_event_print(struct seq_file *s,
+		struct kgsl_drawobj_sync_event *sync_event)
+{
+	switch (sync_event->type) {
+	case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: {
+		seq_printf(s, "sync: ctx: %d ts: %d",
+				sync_event->context->id, sync_event->timestamp);
+		break;
+	}
+	case KGSL_CMD_SYNCPOINT_TYPE_FENCE:
+		seq_printf(s, "sync: [%pK] %s", sync_event->handle,
+		(sync_event->handle && sync_event->handle->fence)
+				? sync_event->handle->fence->name : "NULL");
+		break;
+	default:
+		seq_printf(s, "sync: type: %d", sync_event->type);
+		break;
+	}
+}
+
+struct flag_entry {
+	unsigned long mask;
+	const char *str;
+};
+
+static const struct flag_entry drawobj_flags[] = {KGSL_DRAWOBJ_FLAGS};
+
+static const struct flag_entry cmdobj_priv[] = {
+	{ CMDOBJ_SKIP, "skip"},
+	{ CMDOBJ_FORCE_PREAMBLE, "force_preamble"},
+	{ CMDOBJ_WFI, "wait_for_idle" },
+};
+
+static const struct flag_entry context_flags[] = {KGSL_CONTEXT_FLAGS};
+
+/*
+ * Note that the ADRENO_CONTEXT_* flags start at
+ * KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC so it is ok to cross the streams here.
+ */
+static const struct flag_entry context_priv[] = {
+	{ KGSL_CONTEXT_PRIV_DETACHED, "detached"},
+	{ KGSL_CONTEXT_PRIV_INVALID, "invalid"},
+	{ KGSL_CONTEXT_PRIV_PAGEFAULT, "pagefault"},
+	{ ADRENO_CONTEXT_FAULT, "fault"},
+	{ ADRENO_CONTEXT_GPU_HANG, "gpu_hang"},
+	{ ADRENO_CONTEXT_GPU_HANG_FT, "gpu_hang_ft"},
+	{ ADRENO_CONTEXT_SKIP_EOF, "skip_end_of_frame" },
+	{ ADRENO_CONTEXT_FORCE_PREAMBLE, "force_preamble"},
+};
+
+static void print_flags(struct seq_file *s, const struct flag_entry *table,
+			size_t table_size, unsigned long flags)
+{
+	int i;
+	int first = 1;
+
+	for (i = 0; i < table_size; i++) {
+		if (flags & table[i].mask) {
+			seq_printf(s, "%c%s", first ? '\0' : '|', table[i].str);
+			flags &= ~(table[i].mask);
+			first = 0;
+		}
+	}
+	if (flags) {
+		seq_printf(s, "%c0x%lx", first ? '\0' : '|', flags);
+		first = 0;
+	}
+	if (first)
+		seq_puts(s, "None");
+}
+
+static void syncobj_print(struct seq_file *s,
+			struct kgsl_drawobj_sync *syncobj)
+{
+	struct kgsl_drawobj_sync_event *event;
+	unsigned int i;
+
+	seq_puts(s, " syncobj ");
+
+	for (i = 0; i < syncobj->numsyncs; i++) {
+		event = &syncobj->synclist[i];
+
+		if (!kgsl_drawobj_event_pending(syncobj, i))
+			continue;
+
+		sync_event_print(s, event);
+		seq_puts(s, "\n");
+	}
+}
+
+static void cmdobj_print(struct seq_file *s,
+			struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+	if (drawobj->type == CMDOBJ_TYPE)
+		seq_puts(s, " cmdobj ");
+	else
+		seq_puts(s, " markerobj ");
+
+	seq_printf(s, "\t %d ", drawobj->timestamp);
+
+	seq_puts(s, " priv: ");
+	print_flags(s, cmdobj_priv, ARRAY_SIZE(cmdobj_priv),
+				cmdobj->priv);
+}
+
+static void drawobj_print(struct seq_file *s,
+			struct kgsl_drawobj *drawobj)
+{
+	if (drawobj->type == SYNCOBJ_TYPE)
+		syncobj_print(s, SYNCOBJ(drawobj));
+	else if ((drawobj->type == CMDOBJ_TYPE) ||
+			(drawobj->type == MARKEROBJ_TYPE))
+		cmdobj_print(s, CMDOBJ(drawobj));
+
+	seq_puts(s, " flags: ");
+	print_flags(s, drawobj_flags, ARRAY_SIZE(drawobj_flags),
+		    drawobj->flags);
+
+	seq_puts(s, "\n");
+}
+
+static const char *ctx_type_str(unsigned int type)
+{
+	int i;
+	struct flag_entry table[] = {KGSL_CONTEXT_TYPES};
+
+	for (i = 0; i < ARRAY_SIZE(table); i++)
+		if (type == table[i].mask)
+			return table[i].str;
+	return "UNKNOWN";
+}
+
+static int ctx_print(struct seq_file *s, void *unused)
+{
+	struct adreno_context *drawctxt = s->private;
+	unsigned int i;
+	struct kgsl_event *event;
+	unsigned int queued = 0, consumed = 0, retired = 0;
+
+	seq_printf(s, "id: %d type: %s priority: %d process: %s (%d) tid: %d\n",
+		   drawctxt->base.id,
+		   ctx_type_str(drawctxt->type),
+		   drawctxt->base.priority,
+		   drawctxt->base.proc_priv->comm,
+		   drawctxt->base.proc_priv->pid,
+		   drawctxt->base.tid);
+
+	seq_puts(s, "flags: ");
+	print_flags(s, context_flags, ARRAY_SIZE(context_flags),
+		    drawctxt->base.flags & ~(KGSL_CONTEXT_PRIORITY_MASK
+						| KGSL_CONTEXT_TYPE_MASK));
+	seq_puts(s, " priv: ");
+	print_flags(s, context_priv, ARRAY_SIZE(context_priv),
+			drawctxt->base.priv);
+	seq_puts(s, "\n");
+
+	seq_puts(s, "timestamps: ");
+	kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base,
+				KGSL_TIMESTAMP_QUEUED, &queued);
+	kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base,
+				KGSL_TIMESTAMP_CONSUMED, &consumed);
+	kgsl_readtimestamp(drawctxt->base.device, &drawctxt->base,
+				KGSL_TIMESTAMP_RETIRED, &retired);
+	seq_printf(s, "queued: %u consumed: %u retired: %u global:%u\n",
+		   queued, consumed, retired,
+		   drawctxt->internal_timestamp);
+
+	seq_puts(s, "drawqueue:\n");
+
+	spin_lock(&drawctxt->lock);
+	for (i = drawctxt->drawqueue_head;
+		i != drawctxt->drawqueue_tail;
+		i = DRAWQUEUE_NEXT(i, ADRENO_CONTEXT_DRAWQUEUE_SIZE))
+		drawobj_print(s, drawctxt->drawqueue[i]);
+	spin_unlock(&drawctxt->lock);
+
+	seq_puts(s, "events:\n");
+	spin_lock(&drawctxt->base.events.lock);
+	list_for_each_entry(event, &drawctxt->base.events.events, node)
+		seq_printf(s, "\t%d: %pF created: %u\n", event->timestamp,
+				event->func, event->created);
+	spin_unlock(&drawctxt->base.events.lock);
+
+	return 0;
+}
+
+static int ctx_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	unsigned int id = (unsigned int)(unsigned long)inode->i_private;
+	struct kgsl_context *context;
+
+	context = kgsl_context_get(kgsl_get_device(KGSL_DEVICE_3D0), id);
+	if (context == NULL)
+		return -ENODEV;
+
+	ret = single_open(file, ctx_print, context);
+	if (ret)
+		kgsl_context_put(context);
+	return ret;
+}
+
+static int ctx_release(struct inode *inode, struct file *file)
+{
+	struct kgsl_context *context;
+
+	context = ((struct seq_file *)file->private_data)->private;
+
+	kgsl_context_put(context);
+
+	return single_release(inode, file);
+}
+
+static const struct file_operations ctx_fops = {
+	.open = ctx_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = ctx_release,
+};
+
+
+void
+adreno_context_debugfs_init(struct adreno_device *adreno_dev,
+			    struct adreno_context *ctx)
+{
+	unsigned char name[16];
+
+	snprintf(name, sizeof(name), "%d", ctx->base.id);
+
+	ctx->debug_root = debugfs_create_file(name, 0444,
+				adreno_dev->ctx_d_debugfs,
+				(void *)(unsigned long)ctx->base.id, &ctx_fops);
+}
+
+void adreno_debugfs_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!device->d_debugfs || IS_ERR(device->d_debugfs))
+		return;
+
+	debugfs_create_file("active_cnt", 0444, device->d_debugfs, device,
+			    &_active_count_fops);
+	adreno_dev->ctx_d_debugfs = debugfs_create_dir("ctx",
+							device->d_debugfs);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_LM)) {
+		debugfs_create_file("lm_limit", 0644, device->d_debugfs, device,
+			&_lm_limit_fops);
+		debugfs_create_file("lm_threshold_count", 0444,
+			device->d_debugfs, device, &_lm_threshold_fops);
+	}
+
+	if (adreno_is_a5xx(adreno_dev))
+		debugfs_create_file("isdb", 0644, device->d_debugfs,
+			device, &_isdb_fops);
+}
diff --git a/drivers/gpu/msm/adreno_dispatch.c b/drivers/gpu/msm/adreno_dispatch.c
new file mode 100644
index 0000000..7ad94b8
--- /dev/null
+++ b/drivers/gpu/msm/adreno_dispatch.c
@@ -0,0 +1,2674 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/wait.h>
+#include <linux/delay.h>
+#include <linux/sched.h>
+#include <linux/jiffies.h>
+#include <linux/err.h>
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "adreno.h"
+#include "adreno_ringbuffer.h"
+#include "adreno_trace.h"
+#include "kgsl_sharedmem.h"
+
+#define DRAWQUEUE_NEXT(_i, _s) (((_i) + 1) % (_s))
+
+/* Time in ms after which the dispatcher tries to schedule an unscheduled RB */
+unsigned int adreno_dispatch_starvation_time = 2000;
+
+/* Amount of time in ms that a starved RB is permitted to execute for */
+unsigned int adreno_dispatch_time_slice = 25;
+
+/*
+ * If set then dispatcher tries to schedule lower priority RB's after if they
+ * have commands in their pipe and have been inactive for
+ * _dispatch_starvation_time. Also, once an RB is schduled it will be allowed
+ * to run for _dispatch_time_slice unless it's commands complete before
+ * _dispatch_time_slice
+ */
+unsigned int adreno_disp_preempt_fair_sched;
+
+/* Number of commands that can be queued in a context before it sleeps */
+static unsigned int _context_drawqueue_size = 50;
+
+/* Number of milliseconds to wait for the context queue to clear */
+static unsigned int _context_queue_wait = 10000;
+
+/* Number of drawobjs sent at a time from a single context */
+static unsigned int _context_drawobj_burst = 5;
+
+/*
+ * GFT throttle parameters. If GFT recovered more than
+ * X times in Y ms invalidate the context and do not attempt recovery.
+ * X -> _fault_throttle_burst
+ * Y -> _fault_throttle_time
+ */
+static unsigned int _fault_throttle_time = 3000;
+static unsigned int _fault_throttle_burst = 3;
+
+/*
+ * Maximum ringbuffer inflight for the single submitting context case - this
+ * should be sufficiently high to keep the GPU loaded
+ */
+static unsigned int _dispatcher_q_inflight_hi = 15;
+
+/*
+ * Minimum inflight for the multiple context case - this should sufficiently low
+ * to allow for lower latency context switching
+ */
+static unsigned int _dispatcher_q_inflight_lo = 4;
+
+/* Command batch timeout (in milliseconds) */
+unsigned int adreno_drawobj_timeout = 2000;
+
+/* Interval for reading and comparing fault detection registers */
+static unsigned int _fault_timer_interval = 200;
+
+#define DRAWQUEUE_RB(_drawqueue) \
+	((struct adreno_ringbuffer *) \
+		container_of((_drawqueue),\
+		struct adreno_ringbuffer, dispatch_q))
+
+#define DRAWQUEUE(_ringbuffer) (&(_ringbuffer)->dispatch_q)
+
+static int adreno_dispatch_retire_drawqueue(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue);
+
+static inline bool drawqueue_is_current(
+		struct adreno_dispatcher_drawqueue *drawqueue)
+{
+	struct adreno_ringbuffer *rb = DRAWQUEUE_RB(drawqueue);
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+
+	return (adreno_dev->cur_rb == rb);
+}
+
+static void _add_context(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	/* Remove it from the list */
+	list_del_init(&drawctxt->active_node);
+
+	/* And push it to the front */
+	drawctxt->active_time = jiffies;
+	list_add(&drawctxt->active_node, &adreno_dev->active_list);
+}
+
+static int __count_context(struct adreno_context *drawctxt, void *data)
+{
+	unsigned long expires = drawctxt->active_time + msecs_to_jiffies(100);
+
+	return time_after(jiffies, expires) ? 0 : 1;
+}
+
+static int __count_drawqueue_context(struct adreno_context *drawctxt,
+				void *data)
+{
+	unsigned long expires = drawctxt->active_time + msecs_to_jiffies(100);
+
+	if (time_after(jiffies, expires))
+		return 0;
+
+	return (&drawctxt->rb->dispatch_q ==
+			(struct adreno_dispatcher_drawqueue *) data) ? 1 : 0;
+}
+
+static int _adreno_count_active_contexts(struct adreno_device *adreno_dev,
+		int (*func)(struct adreno_context *, void *), void *data)
+{
+	struct adreno_context *ctxt;
+	int count = 0;
+
+	list_for_each_entry(ctxt, &adreno_dev->active_list, active_node) {
+		if (func(ctxt, data) == 0)
+			return count;
+
+		count++;
+	}
+
+	return count;
+}
+
+static void _track_context(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue,
+		struct adreno_context *drawctxt)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	spin_lock(&adreno_dev->active_list_lock);
+
+	_add_context(adreno_dev, drawctxt);
+
+	device->active_context_count =
+			_adreno_count_active_contexts(adreno_dev,
+					__count_context, NULL);
+	drawqueue->active_context_count =
+			_adreno_count_active_contexts(adreno_dev,
+					__count_drawqueue_context, drawqueue);
+
+	spin_unlock(&adreno_dev->active_list_lock);
+}
+
+/*
+ *  If only one context has queued in the last 100 milliseconds increase
+ *  inflight to a high number to load up the GPU. If multiple contexts
+ *  have queued drop the inflight for better context switch latency.
+ *  If no contexts have queued what are you even doing here?
+ */
+
+static inline int
+_drawqueue_inflight(struct adreno_dispatcher_drawqueue *drawqueue)
+{
+	return (drawqueue->active_context_count > 1)
+		? _dispatcher_q_inflight_lo : _dispatcher_q_inflight_hi;
+}
+
+static void fault_detect_read(struct adreno_device *adreno_dev)
+{
+	int i;
+
+	if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv))
+		return;
+
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		struct adreno_ringbuffer *rb = &(adreno_dev->ringbuffers[i]);
+
+		adreno_rb_readtimestamp(adreno_dev, rb,
+			KGSL_TIMESTAMP_RETIRED, &(rb->fault_detect_ts));
+	}
+
+	for (i = 0; i < adreno_ft_regs_num; i++) {
+		if (adreno_ft_regs[i] != 0)
+			kgsl_regread(KGSL_DEVICE(adreno_dev), adreno_ft_regs[i],
+				&adreno_ft_regs_val[i]);
+	}
+}
+
+/*
+ * Check to see if the device is idle
+ */
+static inline bool _isidle(struct adreno_device *adreno_dev)
+{
+	const struct adreno_gpu_core *gpucore = adreno_dev->gpucore;
+	unsigned int reg_rbbm_status;
+
+	if (!kgsl_state_is_awake(KGSL_DEVICE(adreno_dev)))
+		goto ret;
+
+	/* only check rbbm status to determine if GPU is idle */
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, &reg_rbbm_status);
+
+	if (reg_rbbm_status & gpucore->busy_mask)
+		return false;
+
+ret:
+	/* Clear the existing register values */
+	memset(adreno_ft_regs_val, 0,
+		adreno_ft_regs_num * sizeof(unsigned int));
+
+	return true;
+}
+
+/**
+ * fault_detect_read_compare() - Read the fault detect registers and compare
+ * them to the current value
+ * @device: Pointer to the KGSL device struct
+ *
+ * Read the set of fault detect registers and compare them to the current set
+ * of registers.  Return 1 if any of the register values changed. Also, compare
+ * if the current RB's timstamp has changed or not.
+ */
+static int fault_detect_read_compare(struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+	int i, ret = 0;
+	unsigned int ts;
+
+	/* Check to see if the device is idle - if so report no hang */
+	if (_isidle(adreno_dev) == true)
+		ret = 1;
+
+	for (i = 0; i < adreno_ft_regs_num; i++) {
+		unsigned int val;
+
+		if (adreno_ft_regs[i] == 0)
+			continue;
+		kgsl_regread(KGSL_DEVICE(adreno_dev), adreno_ft_regs[i], &val);
+		if (val != adreno_ft_regs_val[i])
+			ret = 1;
+		adreno_ft_regs_val[i] = val;
+	}
+
+	if (!adreno_rb_readtimestamp(adreno_dev, adreno_dev->cur_rb,
+				KGSL_TIMESTAMP_RETIRED, &ts)) {
+		if (ts != rb->fault_detect_ts)
+			ret = 1;
+
+		rb->fault_detect_ts = ts;
+	}
+
+	return ret;
+}
+
+static void start_fault_timer(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	if (adreno_soft_fault_detect(adreno_dev))
+		mod_timer(&dispatcher->fault_timer,
+			jiffies + msecs_to_jiffies(_fault_timer_interval));
+}
+
+/**
+ * _retire_timestamp() - Retire object without sending it
+ * to the hardware
+ * @drawobj: Pointer to the object to retire
+ *
+ * In some cases ibs can be retired by the software
+ * without going to the GPU.  In those cases, update the
+ * memstore from the CPU, kick off the event engine to handle
+ * expired events and destroy the ib.
+ */
+static void _retire_timestamp(struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_context *context = drawobj->context;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct kgsl_device *device = context->device;
+
+	/*
+	 * Write the start and end timestamp to the memstore to keep the
+	 * accounting sane
+	 */
+	kgsl_sharedmem_writel(device, &device->memstore,
+		KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+		drawobj->timestamp);
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+		KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+		drawobj->timestamp);
+
+
+	/* Retire pending GPU events for the object */
+	kgsl_process_event_group(device, &context->events);
+
+	/*
+	 * For A3xx we still get the rptr from the CP_RB_RPTR instead of
+	 * rptr scratch out address. At this point GPU clocks turned off.
+	 * So avoid reading GPU register directly for A3xx.
+	 */
+	if (adreno_is_a3xx(ADRENO_DEVICE(device)))
+		trace_adreno_cmdbatch_retired(drawobj, -1, 0, 0, drawctxt->rb,
+				0, 0);
+	else
+		trace_adreno_cmdbatch_retired(drawobj, -1, 0, 0, drawctxt->rb,
+			adreno_get_rptr(drawctxt->rb), 0);
+	kgsl_drawobj_destroy(drawobj);
+}
+
+static int _check_context_queue(struct adreno_context *drawctxt)
+{
+	int ret;
+
+	spin_lock(&drawctxt->lock);
+
+	/*
+	 * Wake up if there is room in the context or if the whole thing got
+	 * invalidated while we were asleep
+	 */
+
+	if (kgsl_context_invalid(&drawctxt->base))
+		ret = 1;
+	else
+		ret = drawctxt->queued < _context_drawqueue_size ? 1 : 0;
+
+	spin_unlock(&drawctxt->lock);
+
+	return ret;
+}
+
+/*
+ * return true if this is a marker command and the dependent timestamp has
+ * retired
+ */
+static bool _marker_expired(struct kgsl_drawobj_cmd *markerobj)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(markerobj);
+
+	return (drawobj->flags & KGSL_DRAWOBJ_MARKER) &&
+		kgsl_check_timestamp(drawobj->device, drawobj->context,
+			markerobj->marker_timestamp);
+}
+
+static inline void _pop_drawobj(struct adreno_context *drawctxt)
+{
+	drawctxt->drawqueue_head = DRAWQUEUE_NEXT(drawctxt->drawqueue_head,
+		ADRENO_CONTEXT_DRAWQUEUE_SIZE);
+	drawctxt->queued--;
+}
+
+static int _retire_markerobj(struct kgsl_drawobj_cmd *cmdobj,
+				struct adreno_context *drawctxt)
+{
+	if (_marker_expired(cmdobj)) {
+		_pop_drawobj(drawctxt);
+		_retire_timestamp(DRAWOBJ(cmdobj));
+		return 0;
+	}
+
+	/*
+	 * If the marker isn't expired but the SKIP bit
+	 * is set then there are real commands following
+	 * this one in the queue. This means that we
+	 * need to dispatch the command so that we can
+	 * keep the timestamp accounting correct. If
+	 * skip isn't set then we block this queue
+	 * until the dependent timestamp expires
+	 */
+	return test_bit(CMDOBJ_SKIP, &cmdobj->priv) ? 1 : -EAGAIN;
+}
+
+static int _retire_syncobj(struct kgsl_drawobj_sync *syncobj,
+				struct adreno_context *drawctxt)
+{
+	if (!kgsl_drawobj_events_pending(syncobj)) {
+		_pop_drawobj(drawctxt);
+		kgsl_drawobj_destroy(DRAWOBJ(syncobj));
+		return 0;
+	}
+
+	/*
+	 * If we got here, there are pending events for sync object.
+	 * Start the canary timer if it hasnt been started already.
+	 */
+	if (!syncobj->timeout_jiffies) {
+		syncobj->timeout_jiffies = jiffies + msecs_to_jiffies(5000);
+			mod_timer(&syncobj->timer, syncobj->timeout_jiffies);
+	}
+
+	return -EAGAIN;
+}
+
+/*
+ * Retires all expired marker and sync objs from the context
+ * queue and returns one of the below
+ * a) next drawobj that needs to be sent to ringbuffer
+ * b) -EAGAIN for syncobj with syncpoints pending.
+ * c) -EAGAIN for markerobj whose marker timestamp has not expired yet.
+ * c) NULL for no commands remaining in drawqueue.
+ */
+static struct kgsl_drawobj *_process_drawqueue_get_next_drawobj(
+				struct adreno_context *drawctxt)
+{
+	struct kgsl_drawobj *drawobj;
+	unsigned int i = drawctxt->drawqueue_head;
+	int ret = 0;
+
+	if (drawctxt->drawqueue_head == drawctxt->drawqueue_tail)
+		return NULL;
+
+	for (i = drawctxt->drawqueue_head; i != drawctxt->drawqueue_tail;
+			i = DRAWQUEUE_NEXT(i, ADRENO_CONTEXT_DRAWQUEUE_SIZE)) {
+
+		drawobj = drawctxt->drawqueue[i];
+
+		if (drawobj == NULL)
+			return NULL;
+
+		if (drawobj->type == CMDOBJ_TYPE)
+			return drawobj;
+		else if (drawobj->type == MARKEROBJ_TYPE) {
+			ret = _retire_markerobj(CMDOBJ(drawobj), drawctxt);
+			/* Special case where marker needs to be sent to GPU */
+			if (ret == 1)
+				return drawobj;
+		} else if (drawobj->type == SYNCOBJ_TYPE)
+			ret = _retire_syncobj(SYNCOBJ(drawobj), drawctxt);
+
+		if (ret == -EAGAIN)
+			return ERR_PTR(-EAGAIN);
+
+		continue;
+	}
+
+	return NULL;
+}
+
+/**
+ * adreno_dispatcher_requeue_cmdobj() - Put a command back on the context
+ * queue
+ * @drawctxt: Pointer to the adreno draw context
+ * @cmdobj: Pointer to the KGSL command object to requeue
+ *
+ * Failure to submit a command to the ringbuffer isn't the fault of the command
+ * being submitted so if a failure happens, push it back on the head of the the
+ * context queue to be reconsidered again unless the context got detached.
+ */
+static inline int adreno_dispatcher_requeue_cmdobj(
+		struct adreno_context *drawctxt,
+		struct kgsl_drawobj_cmd *cmdobj)
+{
+	unsigned int prev;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+	spin_lock(&drawctxt->lock);
+
+	if (kgsl_context_detached(&drawctxt->base) ||
+		kgsl_context_invalid(&drawctxt->base)) {
+		spin_unlock(&drawctxt->lock);
+		/* get rid of this drawobj since the context is bad */
+		kgsl_drawobj_destroy(drawobj);
+		return -ENOENT;
+	}
+
+	prev = drawctxt->drawqueue_head == 0 ?
+		(ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1) :
+		(drawctxt->drawqueue_head - 1);
+
+	/*
+	 * The maximum queue size always needs to be one less then the size of
+	 * the ringbuffer queue so there is "room" to put the drawobj back in
+	 */
+
+	WARN_ON(prev == drawctxt->drawqueue_tail);
+
+	drawctxt->drawqueue[prev] = drawobj;
+	drawctxt->queued++;
+
+	/* Reset the command queue head to reflect the newly requeued change */
+	drawctxt->drawqueue_head = prev;
+	spin_unlock(&drawctxt->lock);
+	return 0;
+}
+
+/**
+ * dispatcher_queue_context() - Queue a context in the dispatcher pending list
+ * @dispatcher: Pointer to the adreno dispatcher struct
+ * @drawctxt: Pointer to the adreno draw context
+ *
+ * Add a context to the dispatcher pending list.
+ */
+static void  dispatcher_queue_context(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	/* Refuse to queue a detached context */
+	if (kgsl_context_detached(&drawctxt->base))
+		return;
+
+	spin_lock(&dispatcher->plist_lock);
+
+	if (plist_node_empty(&drawctxt->pending)) {
+		/* Get a reference to the context while it sits on the list */
+		if (_kgsl_context_get(&drawctxt->base)) {
+			trace_dispatch_queue_context(drawctxt);
+			plist_add(&drawctxt->pending, &dispatcher->pending);
+		}
+	}
+
+	spin_unlock(&dispatcher->plist_lock);
+}
+
+/**
+ * sendcmd() - Send a drawobj to the GPU hardware
+ * @dispatcher: Pointer to the adreno dispatcher struct
+ * @drawobj: Pointer to the KGSL drawobj being sent
+ *
+ * Send a KGSL drawobj to the GPU hardware
+ */
+static int sendcmd(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	struct adreno_dispatcher_drawqueue *dispatch_q =
+				ADRENO_DRAWOBJ_DISPATCH_DRAWQUEUE(drawobj);
+	struct adreno_submit_time time;
+	uint64_t secs = 0;
+	unsigned long nsecs = 0;
+	int ret;
+
+	mutex_lock(&device->mutex);
+	if (adreno_gpu_halt(adreno_dev) != 0) {
+		mutex_unlock(&device->mutex);
+		return -EBUSY;
+	}
+
+	dispatcher->inflight++;
+	dispatch_q->inflight++;
+
+	if (dispatcher->inflight == 1 &&
+			!test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) {
+		/* Time to make the donuts.  Turn on the GPU */
+		ret = kgsl_active_count_get(device);
+		if (ret) {
+			dispatcher->inflight--;
+			dispatch_q->inflight--;
+			mutex_unlock(&device->mutex);
+			return ret;
+		}
+
+		set_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+	}
+
+	if (test_bit(ADRENO_DEVICE_DRAWOBJ_PROFILE, &adreno_dev->priv)) {
+		set_bit(CMDOBJ_PROFILE, &cmdobj->priv);
+		cmdobj->profile_index = adreno_dev->profile_index;
+		adreno_dev->profile_index =
+			(adreno_dev->profile_index + 1) %
+			ADRENO_DRAWOBJ_PROFILE_COUNT;
+	}
+
+	ret = adreno_ringbuffer_submitcmd(adreno_dev, cmdobj, &time);
+
+	/*
+	 * On the first command, if the submission was successful, then read the
+	 * fault registers.  If it failed then turn off the GPU. Sad face.
+	 */
+
+	if (dispatcher->inflight == 1) {
+		if (ret == 0) {
+
+			/* Stop fault timer before reading fault registers */
+			del_timer_sync(&dispatcher->fault_timer);
+
+			fault_detect_read(adreno_dev);
+
+			/* Start the fault timer on first submission */
+			start_fault_timer(adreno_dev);
+
+			if (!test_and_set_bit(ADRENO_DISPATCHER_ACTIVE,
+				&dispatcher->priv))
+				reinit_completion(&dispatcher->idle_gate);
+		} else {
+			kgsl_active_count_put(device);
+			clear_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+		}
+	}
+
+
+	if (ret) {
+		dispatcher->inflight--;
+		dispatch_q->inflight--;
+
+		mutex_unlock(&device->mutex);
+
+		/*
+		 * Don't log a message in case of:
+		 * -ENOENT means that the context was detached before the
+		 * command was submitted
+		 * -ENOSPC means that there temporarily isn't any room in the
+		 *  ringbuffer
+		 *  -PROTO means that a fault is currently being worked
+		 */
+
+		if (ret != -ENOENT && ret != -ENOSPC && ret != -EPROTO)
+			KGSL_DRV_ERR(device,
+				"Unable to submit command to the ringbuffer %d\n",
+				ret);
+		return ret;
+	}
+
+	secs = time.ktime;
+	nsecs = do_div(secs, 1000000000);
+
+	trace_adreno_cmdbatch_submitted(drawobj, (int) dispatcher->inflight,
+		time.ticks, (unsigned long) secs, nsecs / 1000, drawctxt->rb,
+		adreno_get_rptr(drawctxt->rb));
+
+	mutex_unlock(&device->mutex);
+
+	cmdobj->submit_ticks = time.ticks;
+
+	dispatch_q->cmd_q[dispatch_q->tail] = cmdobj;
+	dispatch_q->tail = (dispatch_q->tail + 1) %
+		ADRENO_DISPATCH_DRAWQUEUE_SIZE;
+
+	/*
+	 * For the first submission in any given command queue update the
+	 * expected expire time - this won't actually be used / updated until
+	 * the command queue in question goes current, but universally setting
+	 * it here avoids the possibilty of some race conditions with preempt
+	 */
+
+	if (dispatch_q->inflight == 1)
+		dispatch_q->expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+
+	/*
+	 * If we believe ourselves to be current and preemption isn't a thing,
+	 * then set up the timer.  If this misses, then preemption is indeed a
+	 * thing and the timer will be set up in due time
+	 */
+	if (!adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+		if (drawqueue_is_current(dispatch_q))
+			mod_timer(&dispatcher->timer, dispatch_q->expires);
+	}
+
+
+	/*
+	 * we just submitted something, readjust ringbuffer
+	 * execution level
+	 */
+	if (gpudev->preemption_schedule)
+		gpudev->preemption_schedule(adreno_dev);
+	return 0;
+}
+
+/**
+ * dispatcher_context_sendcmds() - Send commands from a context to the GPU
+ * @adreno_dev: Pointer to the adreno device struct
+ * @drawctxt: Pointer to the adreno context to dispatch commands from
+ *
+ * Dequeue and send a burst of commands from the specified context to the GPU
+ * Returns postive if the context needs to be put back on the pending queue
+ * 0 if the context is empty or detached and negative on error
+ */
+static int dispatcher_context_sendcmds(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt)
+{
+	struct adreno_dispatcher_drawqueue *dispatch_q =
+					&(drawctxt->rb->dispatch_q);
+	int count = 0;
+	int ret = 0;
+	int inflight = _drawqueue_inflight(dispatch_q);
+	unsigned int timestamp;
+
+	if (dispatch_q->inflight >= inflight) {
+		spin_lock(&drawctxt->lock);
+		_process_drawqueue_get_next_drawobj(drawctxt);
+		spin_unlock(&drawctxt->lock);
+		return -EBUSY;
+	}
+
+	/*
+	 * Each context can send a specific number of drawobjs per cycle
+	 */
+	while ((count < _context_drawobj_burst) &&
+		(dispatch_q->inflight < inflight)) {
+		struct kgsl_drawobj *drawobj;
+		struct kgsl_drawobj_cmd *cmdobj;
+
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			break;
+
+		spin_lock(&drawctxt->lock);
+		drawobj = _process_drawqueue_get_next_drawobj(drawctxt);
+
+		/*
+		 * adreno_context_get_drawobj returns -EAGAIN if the current
+		 * drawobj has pending sync points so no more to do here.
+		 * When the sync points are satisfied then the context will get
+		 * reqeueued
+		 */
+
+		if (IS_ERR_OR_NULL(drawobj)) {
+			if (IS_ERR(drawobj))
+				ret = PTR_ERR(drawobj);
+			spin_unlock(&drawctxt->lock);
+			break;
+		}
+		_pop_drawobj(drawctxt);
+		spin_unlock(&drawctxt->lock);
+
+		timestamp = drawobj->timestamp;
+		cmdobj = CMDOBJ(drawobj);
+		ret = sendcmd(adreno_dev, cmdobj);
+
+		/*
+		 * On error from sendcmd() try to requeue the cmdobj
+		 * unless we got back -ENOENT which means that the context has
+		 * been detached and there will be no more deliveries from here
+		 */
+		if (ret != 0) {
+			/* Destroy the cmdobj on -ENOENT */
+			if (ret == -ENOENT)
+				kgsl_drawobj_destroy(drawobj);
+			else {
+				/*
+				 * If the requeue returns an error, return that
+				 * instead of whatever sendcmd() sent us
+				 */
+				int r = adreno_dispatcher_requeue_cmdobj(
+					drawctxt, cmdobj);
+				if (r)
+					ret = r;
+			}
+
+			break;
+		}
+
+		drawctxt->submitted_timestamp = timestamp;
+
+		count++;
+	}
+
+	/*
+	 * Wake up any snoozing threads if we have consumed any real commands
+	 * or marker commands and we have room in the context queue.
+	 */
+
+	if (_check_context_queue(drawctxt))
+		wake_up_all(&drawctxt->wq);
+
+	if (!ret)
+		ret = count;
+
+	/* Return error or the number of commands queued */
+	return ret;
+}
+
+/**
+ * _adreno_dispatcher_issuecmds() - Issue commmands from pending contexts
+ * @adreno_dev: Pointer to the adreno device struct
+ *
+ * Issue as many commands as possible (up to inflight) from the pending contexts
+ * This function assumes the dispatcher mutex has been locked.
+ */
+static void _adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct adreno_context *drawctxt, *next;
+	struct plist_head requeue, busy_list;
+	int ret;
+
+	/* Leave early if the dispatcher isn't in a happy state */
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return;
+
+	plist_head_init(&requeue);
+	plist_head_init(&busy_list);
+
+	/* Try to fill the ringbuffers as much as possible */
+	while (1) {
+
+		/* Stop doing things if the dispatcher is paused or faulted */
+		if (adreno_gpu_fault(adreno_dev) != 0)
+			break;
+
+		if (adreno_gpu_halt(adreno_dev) != 0)
+			break;
+
+		spin_lock(&dispatcher->plist_lock);
+
+		if (plist_head_empty(&dispatcher->pending)) {
+			spin_unlock(&dispatcher->plist_lock);
+			break;
+		}
+
+		/* Get the next entry on the list */
+		drawctxt = plist_first_entry(&dispatcher->pending,
+			struct adreno_context, pending);
+
+		plist_del(&drawctxt->pending, &dispatcher->pending);
+
+		spin_unlock(&dispatcher->plist_lock);
+
+		if (kgsl_context_detached(&drawctxt->base) ||
+			kgsl_context_invalid(&drawctxt->base)) {
+			kgsl_context_put(&drawctxt->base);
+			continue;
+		}
+
+		ret = dispatcher_context_sendcmds(adreno_dev, drawctxt);
+
+		/* Don't bother requeuing on -ENOENT - context is detached */
+		if (ret != 0 && ret != -ENOENT) {
+			spin_lock(&dispatcher->plist_lock);
+
+			/*
+			 * Check to seen if the context had been requeued while
+			 * we were processing it (probably by another thread
+			 * pushing commands). If it has then shift it to the
+			 * requeue list if it was not able to submit commands
+			 * due to the dispatch_q being full. Also, do a put to
+			 * make sure the reference counting stays accurate.
+			 * If the node is empty then we will put it on the
+			 * requeue list and not touch the refcount since we
+			 * already hold it from the first time it went on the
+			 * list.
+			 */
+
+			if (!plist_node_empty(&drawctxt->pending)) {
+				plist_del(&drawctxt->pending,
+						&dispatcher->pending);
+				kgsl_context_put(&drawctxt->base);
+			}
+
+			if (ret == -EBUSY)
+				/* Inflight queue is full */
+				plist_add(&drawctxt->pending, &busy_list);
+			else
+				plist_add(&drawctxt->pending, &requeue);
+
+			spin_unlock(&dispatcher->plist_lock);
+		} else {
+			/*
+			 * If the context doesn't need be requeued put back the
+			 * refcount
+			 */
+
+			kgsl_context_put(&drawctxt->base);
+		}
+	}
+
+	spin_lock(&dispatcher->plist_lock);
+
+	/* Put the contexts that couldn't submit back on the pending list */
+	plist_for_each_entry_safe(drawctxt, next, &busy_list, pending) {
+		plist_del(&drawctxt->pending, &busy_list);
+		plist_add(&drawctxt->pending, &dispatcher->pending);
+	}
+
+	/* Now put the contexts that need to be requeued back on the list */
+	plist_for_each_entry_safe(drawctxt, next, &requeue, pending) {
+		plist_del(&drawctxt->pending, &requeue);
+		plist_add(&drawctxt->pending, &dispatcher->pending);
+	}
+
+	spin_unlock(&dispatcher->plist_lock);
+}
+
+/**
+ * adreno_dispatcher_issuecmds() - Issue commmands from pending contexts
+ * @adreno_dev: Pointer to the adreno device struct
+ *
+ * Lock the dispatcher and call _adreno_dispatcher_issueibcmds
+ */
+static void adreno_dispatcher_issuecmds(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	/* If the dispatcher is busy then schedule the work for later */
+	if (!mutex_trylock(&dispatcher->mutex)) {
+		adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+		return;
+	}
+
+	_adreno_dispatcher_issuecmds(adreno_dev);
+	mutex_unlock(&dispatcher->mutex);
+}
+
+/**
+ * get_timestamp() - Return the next timestamp for the context
+ * @drawctxt - Pointer to an adreno draw context struct
+ * @drawobj - Pointer to a drawobj
+ * @timestamp - Pointer to a timestamp value possibly passed from the user
+ * @user_ts - user generated timestamp
+ *
+ * Assign a timestamp based on the settings of the draw context and the command
+ * batch.
+ */
+static int get_timestamp(struct adreno_context *drawctxt,
+		struct kgsl_drawobj *drawobj, unsigned int *timestamp,
+		unsigned int user_ts)
+{
+
+	if (drawctxt->base.flags & KGSL_CONTEXT_USER_GENERATED_TS) {
+		/*
+		 * User specified timestamps need to be greater than the last
+		 * issued timestamp in the context
+		 */
+		if (timestamp_cmp(drawctxt->timestamp, user_ts) >= 0)
+			return -ERANGE;
+
+		drawctxt->timestamp = user_ts;
+	} else
+		drawctxt->timestamp++;
+
+	*timestamp = drawctxt->timestamp;
+	drawobj->timestamp = *timestamp;
+	return 0;
+}
+
+static void _set_ft_policy(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt,
+		struct kgsl_drawobj_cmd *cmdobj)
+{
+	/*
+	 * Set the fault tolerance policy for the command batch - assuming the
+	 * context hasn't disabled FT use the current device policy
+	 */
+	if (drawctxt->base.flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
+		set_bit(KGSL_FT_DISABLE, &cmdobj->fault_policy);
+	else
+		cmdobj->fault_policy = adreno_dev->ft_policy;
+}
+
+static void _cmdobj_set_flags(struct adreno_context *drawctxt,
+			struct kgsl_drawobj_cmd *cmdobj)
+{
+	/*
+	 * Force the preamble for this submission only - this is usually
+	 * requested by the dispatcher as part of fault recovery
+	 */
+	if (test_and_clear_bit(ADRENO_CONTEXT_FORCE_PREAMBLE,
+				&drawctxt->base.priv))
+		set_bit(CMDOBJ_FORCE_PREAMBLE, &cmdobj->priv);
+
+	/*
+	 * Force the premable if set from userspace in the context or
+	 * command obj flags
+	 */
+	if ((drawctxt->base.flags & KGSL_CONTEXT_CTX_SWITCH) ||
+		(cmdobj->base.flags & KGSL_DRAWOBJ_CTX_SWITCH))
+		set_bit(CMDOBJ_FORCE_PREAMBLE, &cmdobj->priv);
+
+	/* Skip this ib if IFH_NOP is enabled */
+	if (drawctxt->base.flags & KGSL_CONTEXT_IFH_NOP)
+		set_bit(CMDOBJ_SKIP, &cmdobj->priv);
+
+	/*
+	 * If we are waiting for the end of frame and it hasn't appeared yet,
+	 * then mark the command obj as skipped.  It will still progress
+	 * through the pipeline but it won't actually send any commands
+	 */
+
+	if (test_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->base.priv)) {
+		set_bit(CMDOBJ_SKIP, &cmdobj->priv);
+
+		/*
+		 * If this command obj represents the EOF then clear the way
+		 * for the dispatcher to continue submitting
+		 */
+
+		if (cmdobj->base.flags & KGSL_DRAWOBJ_END_OF_FRAME) {
+			clear_bit(ADRENO_CONTEXT_SKIP_EOF,
+				  &drawctxt->base.priv);
+
+			/*
+			 * Force the preamble on the next command to ensure that
+			 * the state is correct
+			 */
+			set_bit(ADRENO_CONTEXT_FORCE_PREAMBLE,
+				&drawctxt->base.priv);
+		}
+	}
+}
+
+static inline int _check_context_state(struct kgsl_context *context)
+{
+	if (kgsl_context_invalid(context))
+		return -EDEADLK;
+
+	if (kgsl_context_detached(context))
+		return -ENOENT;
+
+	return 0;
+}
+
+static inline bool _verify_ib(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, struct kgsl_memobj_node *ib)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+
+	/* The maximum allowable size for an IB in the CP is 0xFFFFF dwords */
+	if (ib->size == 0 || ((ib->size >> 2) > 0xFFFFF)) {
+		pr_context(device, context, "ctxt %d invalid ib size %lld\n",
+			context->id, ib->size);
+		return false;
+	}
+
+	/* Make sure that the address is mapped */
+	if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, ib->gpuaddr)) {
+		pr_context(device, context, "ctxt %d invalid ib gpuaddr %llX\n",
+			context->id, ib->gpuaddr);
+		return false;
+	}
+
+	return true;
+}
+
+static inline int _verify_cmdobj(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, struct kgsl_drawobj *drawobj[],
+		uint32_t count)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_memobj_node *ib;
+	unsigned int i;
+
+	for (i = 0; i < count; i++) {
+		/* Verify the IBs before they get queued */
+		if (drawobj[i]->type == CMDOBJ_TYPE) {
+			struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj[i]);
+
+			list_for_each_entry(ib, &cmdobj->cmdlist, node)
+				if (_verify_ib(dev_priv,
+					&ADRENO_CONTEXT(context)->base, ib)
+					== false)
+					return -EINVAL;
+			/*
+			 * Clear the wake on touch bit to indicate an IB has
+			 * been submitted since the last time we set it.
+			 * But only clear it when we have rendering commands.
+			 */
+			device->flags &= ~KGSL_FLAG_WAKE_ON_TOUCH;
+		}
+
+		/* A3XX does not have support for drawobj profiling */
+		if (adreno_is_a3xx(ADRENO_DEVICE(device)) &&
+			(drawobj[i]->flags & KGSL_DRAWOBJ_PROFILING))
+			return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static inline int _wait_for_room_in_context_queue(
+	struct adreno_context *drawctxt)
+{
+	int ret = 0;
+
+	/* Wait for room in the context queue */
+	while (drawctxt->queued >= _context_drawqueue_size) {
+		trace_adreno_drawctxt_sleep(drawctxt);
+		spin_unlock(&drawctxt->lock);
+
+		ret = wait_event_interruptible_timeout(drawctxt->wq,
+			_check_context_queue(drawctxt),
+			msecs_to_jiffies(_context_queue_wait));
+
+		spin_lock(&drawctxt->lock);
+		trace_adreno_drawctxt_wake(drawctxt);
+
+		if (ret <= 0)
+			return (ret == 0) ? -ETIMEDOUT : (int) ret;
+	}
+
+	return 0;
+}
+
+static unsigned int _check_context_state_to_queue_cmds(
+	struct adreno_context *drawctxt)
+{
+	int ret = _check_context_state(&drawctxt->base);
+
+	if (ret)
+		return ret;
+
+	ret = _wait_for_room_in_context_queue(drawctxt);
+	if (ret)
+		return ret;
+
+	/*
+	 * Account for the possiblity that the context got invalidated
+	 * while we were sleeping
+	 */
+	return _check_context_state(&drawctxt->base);
+}
+
+static void _queue_drawobj(struct adreno_context *drawctxt,
+	struct kgsl_drawobj *drawobj)
+{
+	/* Put the command into the queue */
+	drawctxt->drawqueue[drawctxt->drawqueue_tail] = drawobj;
+	drawctxt->drawqueue_tail = (drawctxt->drawqueue_tail + 1) %
+			ADRENO_CONTEXT_DRAWQUEUE_SIZE;
+	drawctxt->queued++;
+	trace_adreno_cmdbatch_queued(drawobj, drawctxt->queued);
+}
+
+static int _queue_markerobj(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, struct kgsl_drawobj_cmd *markerobj,
+	uint32_t *timestamp, unsigned int user_ts)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(markerobj);
+	int ret;
+
+	ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts);
+	if (ret)
+		return ret;
+
+	/*
+	 * See if we can fastpath this thing - if nothing is queued
+	 * and nothing is inflight retire without bothering the GPU
+	 */
+	if (!drawctxt->queued && kgsl_check_timestamp(drawobj->device,
+			drawobj->context, drawctxt->queued_timestamp)) {
+		trace_adreno_cmdbatch_queued(drawobj, drawctxt->queued);
+		_retire_timestamp(drawobj);
+		return 1;
+	}
+
+	/*
+	 * Remember the last queued timestamp - the marker will block
+	 * until that timestamp is expired (unless another command
+	 * comes along and forces the marker to execute)
+	 */
+
+	markerobj->marker_timestamp = drawctxt->queued_timestamp;
+	drawctxt->queued_timestamp = *timestamp;
+	_set_ft_policy(adreno_dev, drawctxt, markerobj);
+	_cmdobj_set_flags(drawctxt, markerobj);
+
+	_queue_drawobj(drawctxt, drawobj);
+
+	return 0;
+}
+
+static int _queue_cmdobj(struct adreno_device *adreno_dev,
+	struct adreno_context *drawctxt, struct kgsl_drawobj_cmd *cmdobj,
+	uint32_t *timestamp, unsigned int user_ts)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	unsigned int j;
+	int ret;
+
+	ret = get_timestamp(drawctxt, drawobj, timestamp, user_ts);
+	if (ret)
+		return ret;
+
+	/*
+	 * If this is a real command then we need to force any markers
+	 * queued before it to dispatch to keep time linear - set the
+	 * skip bit so the commands get NOPed.
+	 */
+	j = drawctxt->drawqueue_head;
+
+	while (j != drawctxt->drawqueue_tail) {
+		if (drawctxt->drawqueue[j]->type == MARKEROBJ_TYPE) {
+			struct kgsl_drawobj_cmd *markerobj =
+				CMDOBJ(drawctxt->drawqueue[j]);
+				set_bit(CMDOBJ_SKIP, &markerobj->priv);
+		}
+
+		j = DRAWQUEUE_NEXT(j, ADRENO_CONTEXT_DRAWQUEUE_SIZE);
+	}
+
+	drawctxt->queued_timestamp = *timestamp;
+	_set_ft_policy(adreno_dev, drawctxt, cmdobj);
+	_cmdobj_set_flags(drawctxt, cmdobj);
+
+	_queue_drawobj(drawctxt, drawobj);
+
+	return 0;
+}
+
+static void _queue_syncobj(struct adreno_context *drawctxt,
+	struct kgsl_drawobj_sync *syncobj, uint32_t *timestamp)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj);
+
+	*timestamp = 0;
+	drawobj->timestamp = 0;
+
+	_queue_drawobj(drawctxt, drawobj);
+}
+
+/**
+ * adreno_dispactcher_queue_drawobj() - Queue a new draw object in the context
+ * @dev_priv: Pointer to the device private struct
+ * @context: Pointer to the kgsl draw context
+ * @drawobj: Pointer to the array of drawobj's being submitted
+ * @count: Number of drawobj's being submitted
+ * @timestamp: Pointer to the requested timestamp
+ *
+ * Queue a command in the context - if there isn't any room in the queue, then
+ * block until there is
+ */
+int adreno_dispatcher_queue_cmds(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, struct kgsl_drawobj *drawobj[],
+		uint32_t count, uint32_t *timestamp)
+
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct adreno_dispatcher_drawqueue *dispatch_q;
+	int ret;
+	unsigned int i, user_ts;
+
+	ret = _check_context_state(&drawctxt->base);
+	if (ret)
+		return ret;
+
+	ret = _verify_cmdobj(dev_priv, context, drawobj, count);
+	if (ret)
+		return ret;
+
+	/* wait for the suspend gate */
+	wait_for_completion(&device->halt_gate);
+
+	spin_lock(&drawctxt->lock);
+
+	ret = _check_context_state_to_queue_cmds(drawctxt);
+	if (ret) {
+		spin_unlock(&drawctxt->lock);
+		return ret;
+	}
+
+	user_ts = *timestamp;
+
+	for (i = 0; i < count; i++) {
+
+		switch (drawobj[i]->type) {
+		case MARKEROBJ_TYPE:
+			ret = _queue_markerobj(adreno_dev, drawctxt,
+					CMDOBJ(drawobj[i]),
+					timestamp, user_ts);
+			if (ret == 1) {
+				spin_unlock(&drawctxt->lock);
+				goto done;
+			} else if (ret) {
+				spin_unlock(&drawctxt->lock);
+				return ret;
+			}
+			break;
+		case CMDOBJ_TYPE:
+			ret = _queue_cmdobj(adreno_dev, drawctxt,
+						CMDOBJ(drawobj[i]),
+						timestamp, user_ts);
+			if (ret) {
+				spin_unlock(&drawctxt->lock);
+				return ret;
+			}
+			break;
+		case SYNCOBJ_TYPE:
+			_queue_syncobj(drawctxt, SYNCOBJ(drawobj[i]),
+						timestamp);
+			break;
+		default:
+			spin_unlock(&drawctxt->lock);
+			return -EINVAL;
+		}
+
+	}
+
+	dispatch_q = ADRENO_DRAWOBJ_DISPATCH_DRAWQUEUE(drawobj[0]);
+
+	_track_context(adreno_dev, dispatch_q, drawctxt);
+
+	spin_unlock(&drawctxt->lock);
+
+	kgsl_pwrctrl_update_l2pc(&adreno_dev->dev);
+
+	/* Add the context to the dispatcher pending list */
+	dispatcher_queue_context(adreno_dev, drawctxt);
+
+	/*
+	 * Only issue commands if inflight is less than burst -this prevents us
+	 * from sitting around waiting for the mutex on a busy system - the work
+	 * loop will schedule it for us. Inflight is mutex protected but the
+	 * worse that can happen is that it will go to 0 after we check and if
+	 * it goes to 0 it is because the work loop decremented it and the work
+	 * queue will try to schedule new commands anyway.
+	 */
+
+	if (dispatch_q->inflight < _context_drawobj_burst)
+		adreno_dispatcher_issuecmds(adreno_dev);
+done:
+	if (test_and_clear_bit(ADRENO_CONTEXT_FAULT, &context->priv))
+		return -EPROTO;
+
+	return 0;
+}
+
+static int _mark_context(int id, void *ptr, void *data)
+{
+	unsigned int guilty = *((unsigned int *) data);
+	struct kgsl_context *context = ptr;
+
+	/*
+	 * If the context is guilty mark it as such.  Otherwise mark it as
+	 * innocent if it had not already been marked as guilty.  If id is
+	 * passed as 0 then mark EVERYBODY guilty (recovery failed)
+	 */
+
+	if (guilty == 0 || guilty == context->id)
+		context->reset_status =
+			KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT;
+	else if (context->reset_status !=
+		KGSL_CTX_STAT_GUILTY_CONTEXT_RESET_EXT)
+		context->reset_status =
+			KGSL_CTX_STAT_INNOCENT_CONTEXT_RESET_EXT;
+
+	return 0;
+}
+
+/**
+ * mark_guilty_context() - Mark the given context as guilty (failed recovery)
+ * @device: Pointer to a KGSL device structure
+ * @id: Context ID of the guilty context (or 0 to mark all as guilty)
+ *
+ * Mark the given (or all) context(s) as guilty (failed recovery)
+ */
+static void mark_guilty_context(struct kgsl_device *device, unsigned int id)
+{
+	/* Mark the status for all the contexts in the device */
+
+	read_lock(&device->context_lock);
+	idr_for_each(&device->context_idr, _mark_context, &id);
+	read_unlock(&device->context_lock);
+}
+
+/*
+ * If an IB inside of the drawobj has a gpuaddr that matches the base
+ * passed in then zero the size which effectively skips it when it is submitted
+ * in the ringbuffer.
+ */
+static void _skip_ib(struct kgsl_drawobj_cmd *cmdobj, uint64_t base)
+{
+	struct kgsl_memobj_node *ib;
+
+	list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+		if (ib->gpuaddr == base) {
+			ib->priv |= MEMOBJ_SKIP;
+			if (base)
+				return;
+		}
+	}
+}
+
+static void _skip_cmd(struct kgsl_drawobj_cmd *cmdobj,
+	struct kgsl_drawobj_cmd **replay, int count)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	int i;
+
+	/*
+	 * SKIPCMD policy: next IB issued for this context is tentative
+	 * if it fails we assume that GFT failed and if it succeeds
+	 * we mark GFT as a success.
+	 *
+	 * Find next commandbatch for the faulting context
+	 * If commandbatch is found
+	 * a) store the current commandbatch fault_policy in context's next
+	 *    commandbatch fault_policy
+	 * b) force preamble for next commandbatch
+	 */
+	for (i = 1; i < count; i++) {
+		if (DRAWOBJ(replay[i])->context->id == drawobj->context->id) {
+			replay[i]->fault_policy = replay[0]->fault_policy;
+			set_bit(CMDOBJ_FORCE_PREAMBLE, &replay[i]->priv);
+			set_bit(KGSL_FT_SKIPCMD, &replay[i]->fault_recovery);
+			break;
+		}
+	}
+
+	/*
+	 * If we did not find the next cmd then
+	 * a) set a flag for next command issued in this context
+	 * b) store the fault_policy, this fault_policy becomes the policy of
+	 *    next command issued in this context
+	 */
+	if ((i == count) && drawctxt) {
+		set_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv);
+		drawctxt->fault_policy = replay[0]->fault_policy;
+	}
+
+	/* set the flags to skip this cmdobj */
+	set_bit(CMDOBJ_SKIP, &cmdobj->priv);
+	cmdobj->fault_recovery = 0;
+}
+
+static void _skip_frame(struct kgsl_drawobj_cmd *cmdobj,
+	struct kgsl_drawobj_cmd **replay, int count)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	int skip = 1;
+	int i;
+
+	for (i = 0; i < count; i++) {
+
+		struct kgsl_drawobj *replay_obj = DRAWOBJ(replay[i]);
+
+		/*
+		 * Only operate on drawobj's that belong to the
+		 * faulting context
+		 */
+
+		if (replay_obj->context->id != drawobj->context->id)
+			continue;
+
+		/*
+		 * Skip all the drawobjs in this context until
+		 * the EOF flag is seen.  If the EOF flag is seen then
+		 * force the preamble for the next command.
+		 */
+
+		if (skip) {
+			set_bit(CMDOBJ_SKIP, &replay[i]->priv);
+
+			if (replay_obj->flags & KGSL_DRAWOBJ_END_OF_FRAME)
+				skip = 0;
+		} else {
+			set_bit(CMDOBJ_FORCE_PREAMBLE, &replay[i]->priv);
+			return;
+		}
+	}
+
+	/*
+	 * If the EOF flag hasn't been seen yet then set the flag in the
+	 * drawctxt to keep looking for it
+	 */
+
+	if (skip && drawctxt)
+		set_bit(ADRENO_CONTEXT_SKIP_EOF, &drawctxt->base.priv);
+
+	/*
+	 * If we did see the EOF flag then force the preamble on for the
+	 * next command issued on this context
+	 */
+
+	if (!skip && drawctxt)
+		set_bit(ADRENO_CONTEXT_FORCE_PREAMBLE, &drawctxt->base.priv);
+}
+
+static void remove_invalidated_cmdobjs(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd **replay, int count)
+{
+	int i;
+
+	for (i = 0; i < count; i++) {
+		struct kgsl_drawobj_cmd *cmdobj = replay[i];
+		struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+		if (cmdobj == NULL)
+			continue;
+
+		if (kgsl_context_detached(drawobj->context) ||
+			kgsl_context_invalid(drawobj->context)) {
+			replay[i] = NULL;
+
+			mutex_lock(&device->mutex);
+			kgsl_cancel_events_timestamp(device,
+				&drawobj->context->events, drawobj->timestamp);
+			mutex_unlock(&device->mutex);
+
+			kgsl_drawobj_destroy(drawobj);
+		}
+	}
+}
+
+static char _pidname[TASK_COMM_LEN];
+
+static inline const char *_kgsl_context_comm(struct kgsl_context *context)
+{
+	if (context && context->proc_priv)
+		strlcpy(_pidname, context->proc_priv->comm, sizeof(_pidname));
+	else
+		snprintf(_pidname, TASK_COMM_LEN, "unknown");
+
+	return _pidname;
+}
+
+#define pr_fault(_d, _c, fmt, args...) \
+		dev_err((_d)->dev, "%s[%d]: " fmt, \
+		_kgsl_context_comm((_c)->context), \
+		(_c)->context->proc_priv->pid, ##args)
+
+
+static void adreno_fault_header(struct kgsl_device *device,
+		struct adreno_ringbuffer *rb, struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	unsigned int status, rptr, wptr, ib1sz, ib2sz;
+	uint64_t ib1base, ib2base;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS, &status);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_WPTR, &wptr);
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
+					  ADRENO_REG_CP_IB1_BASE_HI, &ib1base);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &ib1sz);
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB2_BASE,
+					   ADRENO_REG_CP_IB2_BASE_HI, &ib2base);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &ib2sz);
+
+	if (drawobj != NULL) {
+		struct adreno_context *drawctxt =
+			ADRENO_CONTEXT(drawobj->context);
+
+		trace_adreno_gpu_fault(drawobj->context->id,
+			drawobj->timestamp,
+			status, rptr, wptr, ib1base, ib1sz,
+			ib2base, ib2sz, drawctxt->rb->id);
+
+		pr_fault(device, drawobj,
+			"gpu fault ctx %d ts %d status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
+			drawobj->context->id, drawobj->timestamp, status,
+			rptr, wptr, ib1base, ib1sz, ib2base, ib2sz);
+
+		if (rb != NULL)
+			pr_fault(device, drawobj,
+				"gpu fault rb %d rb sw r/w %4.4x/%4.4x\n",
+				rb->id, rptr, rb->wptr);
+	} else {
+		int id = (rb != NULL) ? rb->id : -1;
+
+		dev_err(device->dev,
+			"RB[%d]: gpu fault status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x\n",
+			id, status, rptr, wptr, ib1base, ib1sz, ib2base,
+			ib2sz);
+		if (rb != NULL)
+			dev_err(device->dev,
+				"RB[%d] gpu fault rb sw r/w %4.4x/%4.4x\n",
+				rb->id, rptr, rb->wptr);
+	}
+}
+
+void adreno_fault_skipcmd_detached(struct adreno_device *adreno_dev,
+				 struct adreno_context *drawctxt,
+				 struct kgsl_drawobj *drawobj)
+{
+	if (test_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv) &&
+			kgsl_context_detached(&drawctxt->base)) {
+		pr_context(KGSL_DEVICE(adreno_dev), drawobj->context,
+			"gpu detached context %d\n", drawobj->context->id);
+		clear_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv);
+	}
+}
+
+/**
+ * process_cmdobj_fault() - Process a cmdobj for fault policies
+ * @device: Device on which the cmdobj caused a fault
+ * @replay: List of cmdobj's that are to be replayed on the device. The
+ * first command in the replay list is the faulting command and the remaining
+ * cmdobj's in the list are commands that were submitted to the same queue
+ * as the faulting one.
+ * @count: Number of cmdobj's in replay
+ * @base: The IB1 base at the time of fault
+ * @fault: The fault type
+ */
+static void process_cmdobj_fault(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd **replay, int count,
+		unsigned int base,
+		int fault)
+{
+	struct kgsl_drawobj_cmd *cmdobj = replay[0];
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	int i;
+	char *state = "failed";
+
+	/*
+	 * If GFT recovered more than X times in Y ms invalidate the context
+	 * and do not attempt recovery.
+	 * Example: X==3 and Y==3000 ms, GPU hung at 500ms, 1700ms, 25000ms and
+	 * 3000ms for the same context, we will not try FT and invalidate the
+	 * context @3000ms because context triggered GFT more than 3 times in
+	 * last 3 seconds. If a context caused recoverable GPU hangs
+	 * where 1st and 4th gpu hang are more than 3 seconds apart we
+	 * won't disable GFT and invalidate the context.
+	 */
+	if (test_bit(KGSL_FT_THROTTLE, &cmdobj->fault_policy)) {
+		if (time_after(jiffies, (drawobj->context->fault_time
+				+ msecs_to_jiffies(_fault_throttle_time)))) {
+			drawobj->context->fault_time = jiffies;
+			drawobj->context->fault_count = 1;
+		} else {
+			drawobj->context->fault_count++;
+			if (drawobj->context->fault_count >
+					_fault_throttle_burst) {
+				set_bit(KGSL_FT_DISABLE,
+						&cmdobj->fault_policy);
+				pr_context(device, drawobj->context,
+					 "gpu fault threshold exceeded %d faults in %d msecs\n",
+					 _fault_throttle_burst,
+					 _fault_throttle_time);
+			}
+		}
+	}
+
+	/*
+	 * If FT is disabled for this cmdobj invalidate immediately
+	 */
+
+	if (test_bit(KGSL_FT_DISABLE, &cmdobj->fault_policy) ||
+		test_bit(KGSL_FT_TEMP_DISABLE, &cmdobj->fault_policy)) {
+		state = "skipped";
+		bitmap_zero(&cmdobj->fault_policy, BITS_PER_LONG);
+	}
+
+	/* If the context is detached do not run FT on context */
+	if (kgsl_context_detached(drawobj->context)) {
+		state = "detached";
+		bitmap_zero(&cmdobj->fault_policy, BITS_PER_LONG);
+	}
+
+	/*
+	 * Set a flag so we don't print another PM dump if the cmdobj fails
+	 * again on replay
+	 */
+
+	set_bit(KGSL_FT_SKIP_PMDUMP, &cmdobj->fault_policy);
+
+	/*
+	 * A hardware fault generally means something was deterministically
+	 * wrong with the cmdobj - no point in trying to replay it
+	 * Clear the replay bit and move on to the next policy level
+	 */
+
+	if (fault & ADRENO_HARD_FAULT)
+		clear_bit(KGSL_FT_REPLAY, &(cmdobj->fault_policy));
+
+	/*
+	 * A timeout fault means the IB timed out - clear the policy and
+	 * invalidate - this will clear the FT_SKIP_PMDUMP bit but that is okay
+	 * because we won't see this cmdobj again
+	 */
+
+	if (fault & ADRENO_TIMEOUT_FAULT)
+		bitmap_zero(&cmdobj->fault_policy, BITS_PER_LONG);
+
+	/*
+	 * If the context had a GPU page fault then it is likely it would fault
+	 * again if replayed
+	 */
+
+	if (test_bit(KGSL_CONTEXT_PRIV_PAGEFAULT,
+		     &drawobj->context->priv)) {
+		/* we'll need to resume the mmu later... */
+		clear_bit(KGSL_FT_REPLAY, &cmdobj->fault_policy);
+		clear_bit(KGSL_CONTEXT_PRIV_PAGEFAULT,
+			  &drawobj->context->priv);
+	}
+
+	/*
+	 * Execute the fault tolerance policy. Each cmdobj stores the
+	 * current fault policy that was set when it was queued.
+	 * As the options are tried in descending priority
+	 * (REPLAY -> SKIPIBS -> SKIPFRAME -> NOTHING) the bits are cleared
+	 * from the cmdobj policy so the next thing can be tried if the
+	 * change comes around again
+	 */
+
+	/* Replay the hanging cmdobj again */
+	if (test_and_clear_bit(KGSL_FT_REPLAY, &cmdobj->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdobj, BIT(KGSL_FT_REPLAY));
+		set_bit(KGSL_FT_REPLAY, &cmdobj->fault_recovery);
+		return;
+	}
+
+	/*
+	 * Skip the last IB1 that was played but replay everything else.
+	 * Note that the last IB1 might not be in the "hung" cmdobj
+	 * because the CP may have caused a page-fault while it was prefetching
+	 * the next IB1/IB2. walk all outstanding commands and zap the
+	 * supposedly bad IB1 where ever it lurks.
+	 */
+
+	if (test_and_clear_bit(KGSL_FT_SKIPIB, &cmdobj->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdobj, BIT(KGSL_FT_SKIPIB));
+		set_bit(KGSL_FT_SKIPIB, &cmdobj->fault_recovery);
+
+		for (i = 0; i < count; i++) {
+			if (replay[i] != NULL &&
+				DRAWOBJ(replay[i])->context->id ==
+					drawobj->context->id)
+				_skip_ib(replay[i], base);
+		}
+
+		return;
+	}
+
+	/* Skip the faulted cmdobj submission */
+	if (test_and_clear_bit(KGSL_FT_SKIPCMD, &cmdobj->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdobj, BIT(KGSL_FT_SKIPCMD));
+
+		/* Skip faulting cmdobj */
+		_skip_cmd(cmdobj, replay, count);
+
+		return;
+	}
+
+	if (test_and_clear_bit(KGSL_FT_SKIPFRAME, &cmdobj->fault_policy)) {
+		trace_adreno_cmdbatch_recovery(cmdobj,
+			BIT(KGSL_FT_SKIPFRAME));
+		set_bit(KGSL_FT_SKIPFRAME, &cmdobj->fault_recovery);
+
+		/*
+		 * Skip all the pending cmdobj's for this context until
+		 * the EOF frame is seen
+		 */
+		_skip_frame(cmdobj, replay, count);
+		return;
+	}
+
+	/* If we get here then all the policies failed */
+
+	pr_context(device, drawobj->context, "gpu %s ctx %d ts %d\n",
+		state, drawobj->context->id, drawobj->timestamp);
+
+	/* Mark the context as failed */
+	mark_guilty_context(device, drawobj->context->id);
+
+	/* Invalidate the context */
+	adreno_drawctxt_invalidate(device, drawobj->context);
+}
+
+/**
+ * recover_dispatch_q() - Recover all commands in a dispatch queue by
+ * resubmitting the commands
+ * @device: Device on which recovery is performed
+ * @dispatch_q: The command queue to recover
+ * @fault: Faults caused by the command in the dispatch q
+ * @base: The IB1 base during the fault
+ */
+static void recover_dispatch_q(struct kgsl_device *device,
+		struct adreno_dispatcher_drawqueue *dispatch_q,
+		int fault,
+		unsigned int base)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_drawobj_cmd **replay;
+	unsigned int ptr;
+	int first = 0;
+	int count = 0;
+	int i;
+
+	/* Allocate memory to store the inflight commands */
+	replay = kcalloc(dispatch_q->inflight, sizeof(*replay), GFP_KERNEL);
+
+	if (replay == NULL) {
+		unsigned int ptr = dispatch_q->head;
+
+		/* Recovery failed - mark everybody on this q guilty */
+		while (ptr != dispatch_q->tail) {
+			struct kgsl_drawobj_cmd *cmdobj =
+						dispatch_q->cmd_q[ptr];
+			struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+			mark_guilty_context(device, drawobj->context->id);
+			adreno_drawctxt_invalidate(device, drawobj->context);
+			kgsl_drawobj_destroy(drawobj);
+
+			ptr = DRAWQUEUE_NEXT(ptr,
+				ADRENO_DISPATCH_DRAWQUEUE_SIZE);
+		}
+
+		/*
+		 * Set the replay count to zero - this will ensure that the
+		 * hardware gets reset but nothing else gets played
+		 */
+
+		count = 0;
+		goto replay;
+	}
+
+	/* Copy the inflight cmdobj's into the temporary storage */
+	ptr = dispatch_q->head;
+
+	while (ptr != dispatch_q->tail) {
+		replay[count++] = dispatch_q->cmd_q[ptr];
+		ptr = DRAWQUEUE_NEXT(ptr, ADRENO_DISPATCH_DRAWQUEUE_SIZE);
+	}
+
+	if (fault && count)
+		process_cmdobj_fault(device, replay,
+					count, base, fault);
+replay:
+	dispatch_q->inflight = 0;
+	dispatch_q->head = dispatch_q->tail = 0;
+	/* Remove any pending cmdobj's that have been invalidated */
+	remove_invalidated_cmdobjs(device, replay, count);
+
+	/* Replay the pending command buffers */
+	for (i = 0; i < count; i++) {
+
+		int ret;
+
+		if (replay[i] == NULL)
+			continue;
+
+		/*
+		 * Force the preamble on the first command (if applicable) to
+		 * avoid any strange stage issues
+		 */
+
+		if (first == 0) {
+			set_bit(CMDOBJ_FORCE_PREAMBLE, &replay[i]->priv);
+			first = 1;
+		}
+
+		/*
+		 * Force each cmdobj to wait for idle - this avoids weird
+		 * CP parse issues
+		 */
+
+		set_bit(CMDOBJ_WFI, &replay[i]->priv);
+
+		ret = sendcmd(adreno_dev, replay[i]);
+
+		/*
+		 * If sending the command fails, then try to recover by
+		 * invalidating the context
+		 */
+
+		if (ret) {
+			pr_context(device, replay[i]->base.context,
+				"gpu reset failed ctx %d ts %d\n",
+				replay[i]->base.context->id,
+				replay[i]->base.timestamp);
+
+			/* Mark this context as guilty (failed recovery) */
+			mark_guilty_context(device,
+				replay[i]->base.context->id);
+
+			adreno_drawctxt_invalidate(device,
+				replay[i]->base.context);
+			remove_invalidated_cmdobjs(device, &replay[i],
+				count - i);
+		}
+	}
+
+	/* Clear the fault bit */
+	clear_bit(ADRENO_DEVICE_FAULT, &adreno_dev->priv);
+
+	kfree(replay);
+}
+
+static void do_header_and_snapshot(struct kgsl_device *device,
+		struct adreno_ringbuffer *rb, struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+	/* Always dump the snapshot on a non-drawobj failure */
+	if (cmdobj == NULL) {
+		adreno_fault_header(device, rb, NULL);
+		kgsl_device_snapshot(device, NULL);
+		return;
+	}
+
+	/* Skip everything if the PMDUMP flag is set */
+	if (test_bit(KGSL_FT_SKIP_PMDUMP, &cmdobj->fault_policy))
+		return;
+
+	/* Print the fault header */
+	adreno_fault_header(device, rb, cmdobj);
+
+	if (!(drawobj->context->flags & KGSL_CONTEXT_NO_SNAPSHOT))
+		kgsl_device_snapshot(device, drawobj->context);
+}
+
+static int dispatcher_do_fault(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct adreno_dispatcher_drawqueue *dispatch_q = NULL, *dispatch_q_temp;
+	struct adreno_ringbuffer *rb;
+	struct adreno_ringbuffer *hung_rb = NULL;
+	unsigned int reg;
+	uint64_t base;
+	struct kgsl_drawobj_cmd *cmdobj = NULL;
+	int ret, i;
+	int fault;
+	int halt;
+
+	fault = atomic_xchg(&dispatcher->fault, 0);
+	if (fault == 0)
+		return 0;
+
+	/*
+	 * On A5xx, read RBBM_STATUS3:SMMU_STALLED_ON_FAULT (BIT 24) to
+	 * tell if this function was entered after a pagefault. If so, only
+	 * proceed if the fault handler has already run in the IRQ thread,
+	 * else return early to give the fault handler a chance to run.
+	 */
+	if (!(fault & ADRENO_IOMMU_PAGE_FAULT) && adreno_is_a5xx(adreno_dev)) {
+		unsigned int val;
+
+		mutex_lock(&device->mutex);
+		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_STATUS3, &val);
+		mutex_unlock(&device->mutex);
+		if (val & BIT(24))
+			return 0;
+	}
+
+	/* Turn off all the timers */
+	del_timer_sync(&dispatcher->timer);
+	del_timer_sync(&dispatcher->fault_timer);
+	del_timer_sync(&adreno_dev->preempt.timer);
+
+	mutex_lock(&device->mutex);
+
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_RB_BASE,
+		ADRENO_REG_CP_RB_BASE_HI, &base);
+
+	/*
+	 * Force the CP off for anything but a hard fault to make sure it is
+	 * good and stopped
+	 */
+	if (!(fault & ADRENO_HARD_FAULT)) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_CNTL, &reg);
+		if (adreno_is_a5xx(adreno_dev))
+			reg |= 1 | (1 << 1);
+		else
+			reg |= (1 << 27) | (1 << 28);
+		adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_CNTL, reg);
+	}
+	/*
+	 * retire cmdobj's from all the dispatch_q's before starting recovery
+	 */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		adreno_dispatch_retire_drawqueue(adreno_dev,
+			&(rb->dispatch_q));
+		/* Select the active dispatch_q */
+		if (base == rb->buffer_desc.gpuaddr) {
+			dispatch_q = &(rb->dispatch_q);
+			hung_rb = rb;
+			if (adreno_dev->cur_rb != hung_rb) {
+				adreno_dev->prev_rb = adreno_dev->cur_rb;
+				adreno_dev->cur_rb = hung_rb;
+			}
+		}
+		if (ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED ==
+			rb->starve_timer_state) {
+			adreno_put_gpu_halt(adreno_dev);
+			rb->starve_timer_state =
+			ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT;
+		}
+	}
+
+	if (dispatch_q && !adreno_drawqueue_is_empty(dispatch_q)) {
+		cmdobj = dispatch_q->cmd_q[dispatch_q->head];
+		trace_adreno_cmdbatch_fault(cmdobj, fault);
+	}
+
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
+		ADRENO_REG_CP_IB1_BASE_HI, &base);
+
+	do_header_and_snapshot(device, hung_rb, cmdobj);
+
+	/* Terminate the stalled transaction and resume the IOMMU */
+	if (fault & ADRENO_IOMMU_PAGE_FAULT)
+		kgsl_mmu_pagefault_resume(&device->mmu);
+
+	/* Reset the dispatcher queue */
+	dispatcher->inflight = 0;
+
+	/* Reset the GPU and make sure halt is not set during recovery */
+	halt = adreno_gpu_halt(adreno_dev);
+	adreno_clear_gpu_halt(adreno_dev);
+
+	/*
+	 * If there is a stall in the ringbuffer after all commands have been
+	 * retired then we could hit problems if contexts are waiting for
+	 * internal timestamps that will never retire
+	 */
+
+	if (hung_rb != NULL) {
+		kgsl_sharedmem_writel(device, &device->memstore,
+				MEMSTORE_RB_OFFSET(hung_rb, soptimestamp),
+				hung_rb->timestamp);
+
+		kgsl_sharedmem_writel(device, &device->memstore,
+				MEMSTORE_RB_OFFSET(hung_rb, eoptimestamp),
+				hung_rb->timestamp);
+
+		/* Schedule any pending events to be run */
+		kgsl_process_event_group(device, &hung_rb->events);
+	}
+
+	ret = adreno_reset(device, fault);
+	mutex_unlock(&device->mutex);
+	/* if any other fault got in until reset then ignore */
+	atomic_set(&dispatcher->fault, 0);
+
+	/* If adreno_reset() fails then what hope do we have for the future? */
+	BUG_ON(ret);
+
+	/* recover all the dispatch_q's starting with the one that hung */
+	if (dispatch_q)
+		recover_dispatch_q(device, dispatch_q, fault, base);
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		dispatch_q_temp = &(rb->dispatch_q);
+		if (dispatch_q_temp != dispatch_q)
+			recover_dispatch_q(device, dispatch_q_temp, 0, base);
+	}
+
+	atomic_add(halt, &adreno_dev->halt);
+
+	return 1;
+}
+
+static inline int drawobj_consumed(struct kgsl_drawobj *drawobj,
+		unsigned int consumed, unsigned int retired)
+{
+	return ((timestamp_cmp(drawobj->timestamp, consumed) >= 0) &&
+		(timestamp_cmp(retired, drawobj->timestamp) < 0));
+}
+
+static void _print_recovery(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd *cmdobj)
+{
+	static struct {
+		unsigned int mask;
+		const char *str;
+	} flags[] = { ADRENO_FT_TYPES };
+
+	int i, nr = find_first_bit(&cmdobj->fault_recovery, BITS_PER_LONG);
+	char *result = "unknown";
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+	for (i = 0; i < ARRAY_SIZE(flags); i++) {
+		if (flags[i].mask == BIT(nr)) {
+			result = (char *) flags[i].str;
+			break;
+		}
+	}
+
+	pr_context(device, drawobj->context,
+		"gpu %s ctx %d ts %d policy %lX\n",
+		result, drawobj->context->id, drawobj->timestamp,
+		cmdobj->fault_recovery);
+}
+
+static void cmdobj_profile_ticks(struct adreno_device *adreno_dev,
+	struct kgsl_drawobj_cmd *cmdobj, uint64_t *start, uint64_t *retire)
+{
+	void *ptr = adreno_dev->profile_buffer.hostptr;
+	struct adreno_drawobj_profile_entry *entry;
+
+	entry = (struct adreno_drawobj_profile_entry *)
+		(ptr + (cmdobj->profile_index * sizeof(*entry)));
+
+	/* get updated values of started and retired */
+	rmb();
+	*start = entry->started;
+	*retire = entry->retired;
+}
+
+static void retire_cmdobj(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(drawobj->context);
+	uint64_t start = 0, end = 0;
+
+	if (cmdobj->fault_recovery != 0) {
+		set_bit(ADRENO_CONTEXT_FAULT, &drawobj->context->priv);
+		_print_recovery(KGSL_DEVICE(adreno_dev), cmdobj);
+	}
+
+	if (test_bit(CMDOBJ_PROFILE, &cmdobj->priv))
+		cmdobj_profile_ticks(adreno_dev, cmdobj, &start, &end);
+
+	/*
+	 * For A3xx we still get the rptr from the CP_RB_RPTR instead of
+	 * rptr scratch out address. At this point GPU clocks turned off.
+	 * So avoid reading GPU register directly for A3xx.
+	 */
+	if (adreno_is_a3xx(adreno_dev))
+		trace_adreno_cmdbatch_retired(drawobj,
+			(int) dispatcher->inflight, start, end,
+			ADRENO_DRAWOBJ_RB(drawobj), 0, cmdobj->fault_recovery);
+	else
+		trace_adreno_cmdbatch_retired(drawobj,
+			(int) dispatcher->inflight, start, end,
+			ADRENO_DRAWOBJ_RB(drawobj),
+			adreno_get_rptr(drawctxt->rb), cmdobj->fault_recovery);
+
+	drawctxt->submit_retire_ticks[drawctxt->ticks_index] =
+		end - cmdobj->submit_ticks;
+
+	drawctxt->ticks_index = (drawctxt->ticks_index + 1) %
+		SUBMIT_RETIRE_TICKS_SIZE;
+
+	kgsl_drawobj_destroy(drawobj);
+}
+
+static int adreno_dispatch_retire_drawqueue(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int count = 0;
+
+	while (!adreno_drawqueue_is_empty(drawqueue)) {
+		struct kgsl_drawobj_cmd *cmdobj =
+			drawqueue->cmd_q[drawqueue->head];
+		struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+		if (!kgsl_check_timestamp(device, drawobj->context,
+			drawobj->timestamp))
+			break;
+
+		retire_cmdobj(adreno_dev, cmdobj);
+
+		dispatcher->inflight--;
+		drawqueue->inflight--;
+
+		drawqueue->cmd_q[drawqueue->head] = NULL;
+
+		drawqueue->head = DRAWQUEUE_NEXT(drawqueue->head,
+			ADRENO_DISPATCH_DRAWQUEUE_SIZE);
+
+		count++;
+	}
+
+	return count;
+}
+
+static void _adreno_dispatch_check_timeout(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj =
+			DRAWOBJ(drawqueue->cmd_q[drawqueue->head]);
+
+	/* Don't timeout if the timer hasn't expired yet (duh) */
+	if (time_is_after_jiffies(drawqueue->expires))
+		return;
+
+	/* Don't timeout if the IB timeout is disabled globally */
+	if (!adreno_long_ib_detect(adreno_dev))
+		return;
+
+	/* Don't time out if the context has disabled it */
+	if (drawobj->context->flags & KGSL_CONTEXT_NO_FAULT_TOLERANCE)
+		return;
+
+	pr_context(device, drawobj->context, "gpu timeout ctx %d ts %d\n",
+		drawobj->context->id, drawobj->timestamp);
+
+	adreno_set_gpu_fault(adreno_dev, ADRENO_TIMEOUT_FAULT);
+}
+
+static int adreno_dispatch_process_drawqueue(struct adreno_device *adreno_dev,
+		struct adreno_dispatcher_drawqueue *drawqueue)
+{
+	int count = adreno_dispatch_retire_drawqueue(adreno_dev, drawqueue);
+
+	/* Nothing to do if there are no pending commands */
+	if (adreno_drawqueue_is_empty(drawqueue))
+		return count;
+
+	/* Don't update the drawqueue timeout if we are about to preempt out */
+	if (!adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE))
+		return count;
+
+	/* Don't update the drawqueue timeout if it isn't active */
+	if (!drawqueue_is_current(drawqueue))
+		return count;
+
+	/*
+	 * If the current ringbuffer retired any commands then universally
+	 * reset the timeout
+	 */
+
+	if (count) {
+		drawqueue->expires = jiffies +
+			msecs_to_jiffies(adreno_drawobj_timeout);
+		return count;
+	}
+
+	/*
+	 * If we get here then 1) the ringbuffer is current and 2) we haven't
+	 * retired anything.  Check to see if the timeout if valid for the
+	 * current drawobj and fault if it has expired
+	 */
+	_adreno_dispatch_check_timeout(adreno_dev, drawqueue);
+	return 0;
+}
+
+/* Update the dispatcher timers */
+static void _dispatcher_update_timers(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	/* Kick the idle timer */
+	mutex_lock(&device->mutex);
+	kgsl_pwrscale_update(device);
+	mod_timer(&device->idle_timer,
+		jiffies + device->pwrctrl.interval_timeout);
+	mutex_unlock(&device->mutex);
+
+	/* Check to see if we need to update the command timer */
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+		struct adreno_dispatcher_drawqueue *drawqueue =
+			DRAWQUEUE(adreno_dev->cur_rb);
+
+		if (!adreno_drawqueue_is_empty(drawqueue))
+			mod_timer(&dispatcher->timer, drawqueue->expires);
+	}
+}
+
+/* Take down the dispatcher and release any power states */
+static void _dispatcher_power_down(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	mutex_lock(&device->mutex);
+
+	if (test_and_clear_bit(ADRENO_DISPATCHER_ACTIVE, &dispatcher->priv))
+		complete_all(&dispatcher->idle_gate);
+
+	del_timer_sync(&dispatcher->fault_timer);
+
+	if (test_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv)) {
+		kgsl_active_count_put(device);
+		clear_bit(ADRENO_DISPATCHER_POWER, &dispatcher->priv);
+	}
+
+	mutex_unlock(&device->mutex);
+}
+
+static void adreno_dispatcher_work(struct work_struct *work)
+{
+	struct adreno_dispatcher *dispatcher =
+		container_of(work, struct adreno_dispatcher, work);
+	struct adreno_device *adreno_dev =
+		container_of(dispatcher, struct adreno_device, dispatcher);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int count = 0;
+	unsigned int i = 0;
+
+	mutex_lock(&dispatcher->mutex);
+
+	/*
+	 * As long as there are inflight commands, process retired comamnds from
+	 * all drawqueues
+	 */
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		struct adreno_dispatcher_drawqueue *drawqueue =
+			DRAWQUEUE(&adreno_dev->ringbuffers[i]);
+
+		count += adreno_dispatch_process_drawqueue(adreno_dev,
+			drawqueue);
+		if (dispatcher->inflight == 0)
+			break;
+	}
+
+	kgsl_process_event_groups(device);
+
+	/*
+	 * dispatcher_do_fault() returns 0 if no faults occurred. If that is the
+	 * case, then clean up preemption and try to schedule more work
+	 */
+	if (dispatcher_do_fault(adreno_dev) == 0) {
+
+		/* Clean up after preemption */
+		if (gpudev->preemption_schedule)
+			gpudev->preemption_schedule(adreno_dev);
+
+		/* Run the scheduler for to dispatch new commands */
+		_adreno_dispatcher_issuecmds(adreno_dev);
+	}
+
+	/*
+	 * If there are commands pending, update the timers, otherwise release
+	 * the power state to prepare for power down
+	 */
+	if (dispatcher->inflight > 0)
+		_dispatcher_update_timers(adreno_dev);
+	else
+		_dispatcher_power_down(adreno_dev);
+
+	mutex_unlock(&dispatcher->mutex);
+}
+
+void adreno_dispatcher_schedule(struct kgsl_device *device)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	kgsl_schedule_work(&dispatcher->work);
+}
+
+/**
+ * adreno_dispatcher_queue_context() - schedule a drawctxt in the dispatcher
+ * device: pointer to the KGSL device
+ * drawctxt: pointer to the drawctxt to schedule
+ *
+ * Put a draw context on the dispatcher pending queue and schedule the
+ * dispatcher. This is used to reschedule changes that might have been blocked
+ * for sync points or other concerns
+ */
+void adreno_dispatcher_queue_context(struct kgsl_device *device,
+	struct adreno_context *drawctxt)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	dispatcher_queue_context(adreno_dev, drawctxt);
+	adreno_dispatcher_schedule(device);
+}
+
+/*
+ * This is called on a regular basis while cmdobj's are inflight.  Fault
+ * detection registers are read and compared to the existing values - if they
+ * changed then the GPU is still running.  If they are the same between
+ * subsequent calls then the GPU may have faulted
+ */
+
+static void adreno_dispatcher_fault_timer(unsigned long data)
+{
+	struct adreno_device *adreno_dev = (struct adreno_device *) data;
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	/* Leave if the user decided to turn off fast hang detection */
+	if (!adreno_soft_fault_detect(adreno_dev))
+		return;
+
+	if (adreno_gpu_fault(adreno_dev)) {
+		adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+		return;
+	}
+
+	/*
+	 * Read the fault registers - if it returns 0 then they haven't changed
+	 * so mark the dispatcher as faulted and schedule the work loop.
+	 */
+
+	if (!fault_detect_read_compare(adreno_dev)) {
+		adreno_set_gpu_fault(adreno_dev, ADRENO_SOFT_FAULT);
+		adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+	} else {
+		mod_timer(&dispatcher->fault_timer,
+			jiffies + msecs_to_jiffies(_fault_timer_interval));
+	}
+}
+
+/*
+ * This is called when the timer expires - it either means the GPU is hung or
+ * the IB is taking too long to execute
+ */
+static void adreno_dispatcher_timer(unsigned long data)
+{
+	struct adreno_device *adreno_dev = (struct adreno_device *) data;
+
+	adreno_dispatcher_schedule(KGSL_DEVICE(adreno_dev));
+}
+
+/**
+ * adreno_dispatcher_start() - activate the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ */
+void adreno_dispatcher_start(struct kgsl_device *device)
+{
+	complete_all(&device->halt_gate);
+
+	/* Schedule the work loop to get things going */
+	adreno_dispatcher_schedule(device);
+}
+
+/**
+ * adreno_dispatcher_stop() - stop the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ * Stop the dispatcher and close all the timers
+ */
+void adreno_dispatcher_stop(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+
+	del_timer_sync(&dispatcher->timer);
+	del_timer_sync(&dispatcher->fault_timer);
+}
+
+/**
+ * adreno_dispatcher_close() - close the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ * Close the dispatcher and free all the oustanding commands and memory
+ */
+void adreno_dispatcher_close(struct adreno_device *adreno_dev)
+{
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int i;
+	struct adreno_ringbuffer *rb;
+
+	mutex_lock(&dispatcher->mutex);
+	del_timer_sync(&dispatcher->timer);
+	del_timer_sync(&dispatcher->fault_timer);
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		struct adreno_dispatcher_drawqueue *dispatch_q =
+			&(rb->dispatch_q);
+		while (!adreno_drawqueue_is_empty(dispatch_q)) {
+			kgsl_drawobj_destroy(
+				DRAWOBJ(dispatch_q->cmd_q[dispatch_q->head]));
+			dispatch_q->head = (dispatch_q->head + 1)
+				% ADRENO_DISPATCH_DRAWQUEUE_SIZE;
+		}
+	}
+
+	mutex_unlock(&dispatcher->mutex);
+
+	kobject_put(&dispatcher->kobj);
+}
+
+struct dispatcher_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct adreno_dispatcher *,
+			struct dispatcher_attribute *, char *);
+	ssize_t (*store)(struct adreno_dispatcher *,
+			struct dispatcher_attribute *, const char *buf,
+			size_t count);
+	unsigned int max;
+	unsigned int *value;
+};
+
+#define DISPATCHER_UINT_ATTR(_name, _mode, _max, _value) \
+	struct dispatcher_attribute dispatcher_attr_##_name =  { \
+		.attr = { .name = __stringify(_name), .mode = _mode }, \
+		.show = _show_uint, \
+		.store = _store_uint, \
+		.max = _max, \
+		.value = &(_value), \
+	}
+
+#define to_dispatcher_attr(_a) \
+	container_of((_a), struct dispatcher_attribute, attr)
+#define to_dispatcher(k) container_of(k, struct adreno_dispatcher, kobj)
+
+static ssize_t _store_uint(struct adreno_dispatcher *dispatcher,
+		struct dispatcher_attribute *attr,
+		const char *buf, size_t size)
+{
+	unsigned int val = 0;
+	int ret;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	if (!val || (attr->max && (val > attr->max)))
+		return -EINVAL;
+
+	*((unsigned int *) attr->value) = val;
+	return size;
+}
+
+static ssize_t _show_uint(struct adreno_dispatcher *dispatcher,
+		struct dispatcher_attribute *attr,
+		char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%u\n",
+		*((unsigned int *) attr->value));
+}
+
+static DISPATCHER_UINT_ATTR(inflight, 0644, ADRENO_DISPATCH_DRAWQUEUE_SIZE,
+	_dispatcher_q_inflight_hi);
+
+static DISPATCHER_UINT_ATTR(inflight_low_latency, 0644,
+	ADRENO_DISPATCH_DRAWQUEUE_SIZE, _dispatcher_q_inflight_lo);
+/*
+ * Our code that "puts back" a command from the context is much cleaner
+ * if we are sure that there will always be enough room in the
+ * ringbuffer so restrict the maximum size of the context queue to
+ * ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1
+ */
+static DISPATCHER_UINT_ATTR(context_drawqueue_size, 0644,
+	ADRENO_CONTEXT_DRAWQUEUE_SIZE - 1, _context_drawqueue_size);
+static DISPATCHER_UINT_ATTR(context_burst_count, 0644, 0,
+	_context_drawobj_burst);
+static DISPATCHER_UINT_ATTR(drawobj_timeout, 0644, 0,
+	adreno_drawobj_timeout);
+static DISPATCHER_UINT_ATTR(context_queue_wait, 0644, 0, _context_queue_wait);
+static DISPATCHER_UINT_ATTR(fault_detect_interval, 0644, 0,
+	_fault_timer_interval);
+static DISPATCHER_UINT_ATTR(fault_throttle_time, 0644, 0,
+	_fault_throttle_time);
+static DISPATCHER_UINT_ATTR(fault_throttle_burst, 0644, 0,
+	_fault_throttle_burst);
+static DISPATCHER_UINT_ATTR(disp_preempt_fair_sched, 0644, 0,
+	adreno_disp_preempt_fair_sched);
+static DISPATCHER_UINT_ATTR(dispatch_time_slice, 0644, 0,
+	adreno_dispatch_time_slice);
+static DISPATCHER_UINT_ATTR(dispatch_starvation_time, 0644, 0,
+	adreno_dispatch_starvation_time);
+
+static struct attribute *dispatcher_attrs[] = {
+	&dispatcher_attr_inflight.attr,
+	&dispatcher_attr_inflight_low_latency.attr,
+	&dispatcher_attr_context_drawqueue_size.attr,
+	&dispatcher_attr_context_burst_count.attr,
+	&dispatcher_attr_drawobj_timeout.attr,
+	&dispatcher_attr_context_queue_wait.attr,
+	&dispatcher_attr_fault_detect_interval.attr,
+	&dispatcher_attr_fault_throttle_time.attr,
+	&dispatcher_attr_fault_throttle_burst.attr,
+	&dispatcher_attr_disp_preempt_fair_sched.attr,
+	&dispatcher_attr_dispatch_time_slice.attr,
+	&dispatcher_attr_dispatch_starvation_time.attr,
+	NULL,
+};
+
+static ssize_t dispatcher_sysfs_show(struct kobject *kobj,
+				   struct attribute *attr, char *buf)
+{
+	struct adreno_dispatcher *dispatcher = to_dispatcher(kobj);
+	struct dispatcher_attribute *pattr = to_dispatcher_attr(attr);
+	ssize_t ret = -EIO;
+
+	if (pattr->show)
+		ret = pattr->show(dispatcher, pattr, buf);
+
+	return ret;
+}
+
+static ssize_t dispatcher_sysfs_store(struct kobject *kobj,
+				    struct attribute *attr,
+				    const char *buf, size_t count)
+{
+	struct adreno_dispatcher *dispatcher = to_dispatcher(kobj);
+	struct dispatcher_attribute *pattr = to_dispatcher_attr(attr);
+	ssize_t ret = -EIO;
+
+	if (pattr->store)
+		ret = pattr->store(dispatcher, pattr, buf, count);
+
+	return ret;
+}
+
+static const struct sysfs_ops dispatcher_sysfs_ops = {
+	.show = dispatcher_sysfs_show,
+	.store = dispatcher_sysfs_store
+};
+
+static struct kobj_type ktype_dispatcher = {
+	.sysfs_ops = &dispatcher_sysfs_ops,
+	.default_attrs = dispatcher_attrs,
+};
+
+/**
+ * adreno_dispatcher_init() - Initialize the dispatcher
+ * @adreno_dev: pointer to the adreno device structure
+ *
+ * Initialize the dispatcher
+ */
+int adreno_dispatcher_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int ret;
+
+	memset(dispatcher, 0, sizeof(*dispatcher));
+
+	mutex_init(&dispatcher->mutex);
+
+	setup_timer(&dispatcher->timer, adreno_dispatcher_timer,
+		(unsigned long) adreno_dev);
+
+	setup_timer(&dispatcher->fault_timer, adreno_dispatcher_fault_timer,
+		(unsigned long) adreno_dev);
+
+	INIT_WORK(&dispatcher->work, adreno_dispatcher_work);
+
+	init_completion(&dispatcher->idle_gate);
+	complete_all(&dispatcher->idle_gate);
+
+	plist_head_init(&dispatcher->pending);
+	spin_lock_init(&dispatcher->plist_lock);
+
+	ret = kobject_init_and_add(&dispatcher->kobj, &ktype_dispatcher,
+		&device->dev->kobj, "dispatch");
+
+	return ret;
+}
+
+/*
+ * adreno_dispatcher_idle() - Wait for dispatcher to idle
+ * @adreno_dev: Adreno device whose dispatcher needs to idle
+ *
+ * Signal dispatcher to stop sending more commands and complete
+ * the commands that have already been submitted. This function
+ * should not be called when dispatcher mutex is held.
+ * The caller must hold the device mutex.
+ */
+int adreno_dispatcher_idle(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_dispatcher *dispatcher = &adreno_dev->dispatcher;
+	int ret;
+
+	if (!test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv))
+		return 0;
+
+	/*
+	 * Ensure that this function is not called when dispatcher
+	 * mutex is held and device is started
+	 */
+	if (mutex_is_locked(&dispatcher->mutex) &&
+		dispatcher->mutex.owner == current)
+		return -EDEADLK;
+
+	adreno_get_gpu_halt(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+
+	ret = wait_for_completion_timeout(&dispatcher->idle_gate,
+			msecs_to_jiffies(ADRENO_IDLE_TIMEOUT));
+	if (ret == 0) {
+		ret = -ETIMEDOUT;
+		WARN(1, "Dispatcher halt timeout ");
+	} else if (ret < 0) {
+		KGSL_DRV_ERR(device, "Dispatcher halt failed %d\n", ret);
+	} else {
+		ret = 0;
+	}
+
+	mutex_lock(&device->mutex);
+	adreno_put_gpu_halt(adreno_dev);
+	/*
+	 * requeue dispatcher work to resubmit pending commands
+	 * that may have been blocked due to this idling request
+	 */
+	adreno_dispatcher_schedule(device);
+	return ret;
+}
diff --git a/drivers/gpu/msm/adreno_dispatch.h b/drivers/gpu/msm/adreno_dispatch.h
new file mode 100644
index 0000000..cb9106f
--- /dev/null
+++ b/drivers/gpu/msm/adreno_dispatch.h
@@ -0,0 +1,130 @@
+/* Copyright (c) 2008-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef ____ADRENO_DISPATCHER_H
+#define ____ADRENO_DISPATCHER_H
+
+extern unsigned int adreno_disp_preempt_fair_sched;
+extern unsigned int adreno_drawobj_timeout;
+extern unsigned int adreno_dispatch_starvation_time;
+extern unsigned int adreno_dispatch_time_slice;
+
+/**
+ * enum adreno_dispatcher_starve_timer_states - Starvation control states of
+ * a RB
+ * @ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT: Uninitialized, starvation control
+ * is not operating
+ * @ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT: Starvation timer is initialized
+ * and counting
+ * @ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED: The starvation timer has elapsed
+ * this state indicates that the RB is starved
+ * @ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED: RB is scheduled on the device
+ * and will remain scheduled for a minimum time slice when in this state.
+ */
+enum adreno_dispatcher_starve_timer_states {
+	ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT = 0,
+	ADRENO_DISPATCHER_RB_STARVE_TIMER_INIT = 1,
+	ADRENO_DISPATCHER_RB_STARVE_TIMER_ELAPSED = 2,
+	ADRENO_DISPATCHER_RB_STARVE_TIMER_SCHEDULED = 3,
+};
+
+/*
+ * Maximum size of the dispatcher ringbuffer - the actual inflight size will be
+ * smaller then this but this size will allow for a larger range of inflight
+ * sizes that can be chosen at runtime
+ */
+
+#define ADRENO_DISPATCH_DRAWQUEUE_SIZE 128
+
+#define DRAWQUEUE_NEXT(_i, _s) (((_i) + 1) % (_s))
+
+/**
+ * struct adreno_dispatcher_drawqueue - List of commands for a RB level
+ * @cmd_q: List of command obj's submitted to dispatcher
+ * @inflight: Number of commands inflight in this q
+ * @head: Head pointer to the q
+ * @tail: Queues tail pointer
+ * @active_context_count: Number of active contexts seen in this rb drawqueue
+ * @expires: The jiffies value at which this drawqueue has run too long
+ */
+struct adreno_dispatcher_drawqueue {
+	struct kgsl_drawobj_cmd *cmd_q[ADRENO_DISPATCH_DRAWQUEUE_SIZE];
+	unsigned int inflight;
+	unsigned int head;
+	unsigned int tail;
+	int active_context_count;
+	unsigned long expires;
+};
+
+/**
+ * struct adreno_dispatcher - container for the adreno GPU dispatcher
+ * @mutex: Mutex to protect the structure
+ * @state: Current state of the dispatcher (active or paused)
+ * @timer: Timer to monitor the progress of the drawobjs
+ * @inflight: Number of drawobj operations pending in the ringbuffer
+ * @fault: Non-zero if a fault was detected.
+ * @pending: Priority list of contexts waiting to submit drawobjs
+ * @plist_lock: Spin lock to protect the pending queue
+ * @work: work_struct to put the dispatcher in a work queue
+ * @kobj: kobject for the dispatcher directory in the device sysfs node
+ * @idle_gate: Gate to wait on for dispatcher to idle
+ * @disp_preempt_fair_sched: If set then dispatcher will try to be fair to
+ * starving RB's by scheduling them in and enforcing a minimum time slice
+ * for every RB that is scheduled to run on the device
+ */
+struct adreno_dispatcher {
+	struct mutex mutex;
+	unsigned long priv;
+	struct timer_list timer;
+	struct timer_list fault_timer;
+	unsigned int inflight;
+	atomic_t fault;
+	struct plist_head pending;
+	spinlock_t plist_lock;
+	struct work_struct work;
+	struct kobject kobj;
+	struct completion idle_gate;
+	unsigned int disp_preempt_fair_sched;
+};
+
+enum adreno_dispatcher_flags {
+	ADRENO_DISPATCHER_POWER = 0,
+	ADRENO_DISPATCHER_ACTIVE = 1,
+};
+
+void adreno_dispatcher_start(struct kgsl_device *device);
+int adreno_dispatcher_init(struct adreno_device *adreno_dev);
+void adreno_dispatcher_close(struct adreno_device *adreno_dev);
+int adreno_dispatcher_idle(struct adreno_device *adreno_dev);
+void adreno_dispatcher_irq_fault(struct adreno_device *adreno_dev);
+void adreno_dispatcher_stop(struct adreno_device *adreno_dev);
+
+int adreno_dispatcher_queue_cmds(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, struct kgsl_drawobj *drawobj[],
+		uint32_t count, uint32_t *timestamp);
+
+void adreno_dispatcher_schedule(struct kgsl_device *device);
+void adreno_dispatcher_pause(struct adreno_device *adreno_dev);
+void adreno_dispatcher_queue_context(struct kgsl_device *device,
+		struct adreno_context *drawctxt);
+void adreno_dispatcher_preempt_callback(struct adreno_device *adreno_dev,
+					int bit);
+void adreno_preempt_process_dispatch_queue(struct adreno_device *adreno_dev,
+	struct adreno_dispatcher_drawqueue *dispatch_q);
+
+static inline bool adreno_drawqueue_is_empty(
+		struct adreno_dispatcher_drawqueue *drawqueue)
+{
+	return (drawqueue != NULL && drawqueue->head == drawqueue->tail);
+}
+#endif /* __ADRENO_DISPATCHER_H */
diff --git a/drivers/gpu/msm/adreno_drawctxt.c b/drivers/gpu/msm/adreno_drawctxt.c
new file mode 100644
index 0000000..4da6763
--- /dev/null
+++ b/drivers/gpu/msm/adreno_drawctxt.c
@@ -0,0 +1,620 @@
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/msm_kgsl.h>
+#include <linux/sched.h>
+#include <linux/debugfs.h>
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "adreno.h"
+#include "adreno_trace.h"
+
+static void wait_callback(struct kgsl_device *device,
+		struct kgsl_event_group *group, void *priv, int result)
+{
+	struct adreno_context *drawctxt = priv;
+
+	wake_up_all(&drawctxt->waiting);
+}
+
+static int _check_context_timestamp(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int timestamp)
+{
+	/* Bail if the drawctxt has been invalidated or destroyed */
+	if (kgsl_context_detached(context) || kgsl_context_invalid(context))
+		return 1;
+
+	return kgsl_check_timestamp(device, context, timestamp);
+}
+
+/**
+ * adreno_drawctxt_dump() - dump information about a draw context
+ * @device: KGSL device that owns the context
+ * @context: KGSL context to dump information about
+ *
+ * Dump specific information about the context to the kernel log.  Used for
+ * fence timeout callbacks
+ */
+void adreno_drawctxt_dump(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	unsigned int queue, start, retire;
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int index, pos;
+	char buf[120];
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED, &queue);
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_CONSUMED, &start);
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &retire);
+
+	/*
+	 * We may have kgsl sync obj timer running, which also uses same
+	 * lock, take a lock with software interrupt disabled (bh)
+	 * to avoid spin lock recursion.
+	 *
+	 * Use Spin trylock because dispatcher can acquire drawctxt->lock
+	 * if context is pending and the fence it is waiting on just got
+	 * signalled. Dispatcher acquires drawctxt->lock and tries to
+	 * delete the sync obj timer using del_timer_sync().
+	 * del_timer_sync() waits till timer and its pending handlers
+	 * are deleted. But if the timer expires at the same time,
+	 * timer handler could be waiting on drawctxt->lock leading to a
+	 * deadlock. To prevent this use spin_trylock_bh.
+	 */
+	if (!spin_trylock_bh(&drawctxt->lock)) {
+		dev_err(device->dev, "  context[%d]: could not get lock\n",
+			context->id);
+		return;
+	}
+
+	dev_err(device->dev,
+		"  context[%d]: queue=%d, submit=%d, start=%d, retire=%d\n",
+		context->id, queue, drawctxt->submitted_timestamp,
+		start, retire);
+
+	if (drawctxt->drawqueue_head != drawctxt->drawqueue_tail) {
+		struct kgsl_drawobj *drawobj =
+			drawctxt->drawqueue[drawctxt->drawqueue_head];
+
+		if (test_bit(ADRENO_CONTEXT_FENCE_LOG, &context->priv)) {
+			dev_err(device->dev,
+				"  possible deadlock. Context %d might be blocked for itself\n",
+				context->id);
+			goto stats;
+		}
+
+		if (drawobj->type == SYNCOBJ_TYPE) {
+			struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj);
+
+			if (kgsl_drawobj_events_pending(syncobj)) {
+				dev_err(device->dev,
+					"  context[%d] (ts=%d) Active sync points:\n",
+					context->id, drawobj->timestamp);
+
+				kgsl_dump_syncpoints(device, syncobj);
+			}
+		}
+	}
+
+stats:
+	memset(buf, 0, sizeof(buf));
+
+	pos = 0;
+
+	for (index = 0; index < SUBMIT_RETIRE_TICKS_SIZE; index++) {
+		uint64_t msecs;
+		unsigned int usecs;
+
+		if (!drawctxt->submit_retire_ticks[index])
+			continue;
+		msecs = drawctxt->submit_retire_ticks[index] * 10;
+		usecs = do_div(msecs, 192);
+		usecs = do_div(msecs, 1000);
+		pos += snprintf(buf + pos, sizeof(buf) - pos, "%d.%0d ",
+			(unsigned int)msecs, usecs);
+	}
+	dev_err(device->dev, "  context[%d]: submit times: %s\n",
+		context->id, buf);
+
+	spin_unlock_bh(&drawctxt->lock);
+}
+
+/**
+ * adreno_drawctxt_wait() - sleep until a timestamp expires
+ * @adreno_dev: pointer to the adreno_device struct
+ * @drawctxt: Pointer to the draw context to sleep for
+ * @timetamp: Timestamp to wait on
+ * @timeout: Number of jiffies to wait (0 for infinite)
+ *
+ * Register an event to wait for a timestamp on a context and sleep until it
+ * has past.  Returns < 0 on error, -ETIMEDOUT if the timeout expires or 0
+ * on success
+ */
+int adreno_drawctxt_wait(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int ret;
+	long ret_temp;
+
+	if (kgsl_context_detached(context))
+		return -ENOENT;
+
+	if (kgsl_context_invalid(context))
+		return -EDEADLK;
+
+	trace_adreno_drawctxt_wait_start(-1, context->id, timestamp);
+
+	ret = kgsl_add_event(device, &context->events, timestamp,
+		wait_callback, (void *) drawctxt);
+	if (ret)
+		goto done;
+
+	/*
+	 * If timeout is 0, wait forever. msecs_to_jiffies will force
+	 * values larger than INT_MAX to an infinite timeout.
+	 */
+	if (timeout == 0)
+		timeout = UINT_MAX;
+
+	ret_temp = wait_event_interruptible_timeout(drawctxt->waiting,
+			_check_context_timestamp(device, context, timestamp),
+			msecs_to_jiffies(timeout));
+
+	if (ret_temp == 0) {
+		ret = -ETIMEDOUT;
+		goto done;
+	} else if (ret_temp < 0) {
+		ret = (int) ret_temp;
+		goto done;
+	}
+	ret = 0;
+
+	/* -EDEADLK if the context was invalidated while we were waiting */
+	if (kgsl_context_invalid(context))
+		ret = -EDEADLK;
+
+
+	/* Return -EINVAL if the context was detached while we were waiting */
+	if (kgsl_context_detached(context))
+		ret = -ENOENT;
+
+done:
+	trace_adreno_drawctxt_wait_done(-1, context->id, timestamp, ret);
+	return ret;
+}
+
+/**
+ * adreno_drawctxt_wait_rb() - Wait for the last RB timestamp at which this
+ * context submitted a command to the corresponding RB
+ * @adreno_dev: The device on which the timestamp is active
+ * @context: The context which subbmitted command to RB
+ * @timestamp: The RB timestamp of last command submitted to RB by context
+ * @timeout: Timeout value for the wait
+ * Caller must hold the device mutex
+ */
+static int adreno_drawctxt_wait_rb(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	int ret = 0;
+
+	/*
+	 * If the context is invalid then return immediately - we may end up
+	 * waiting for a timestamp that will never come
+	 */
+	if (kgsl_context_invalid(context))
+		goto done;
+
+	trace_adreno_drawctxt_wait_start(drawctxt->rb->id, context->id,
+					timestamp);
+
+	ret = adreno_ringbuffer_waittimestamp(drawctxt->rb, timestamp, timeout);
+done:
+	trace_adreno_drawctxt_wait_done(drawctxt->rb->id, context->id,
+					timestamp, ret);
+	return ret;
+}
+
+static int drawctxt_detach_drawobjs(struct adreno_context *drawctxt,
+		struct kgsl_drawobj **list)
+{
+	int count = 0;
+
+	while (drawctxt->drawqueue_head != drawctxt->drawqueue_tail) {
+		struct kgsl_drawobj *drawobj =
+			drawctxt->drawqueue[drawctxt->drawqueue_head];
+
+		drawctxt->drawqueue_head = (drawctxt->drawqueue_head + 1) %
+			ADRENO_CONTEXT_DRAWQUEUE_SIZE;
+
+		list[count++] = drawobj;
+	}
+
+	return count;
+}
+
+/**
+ * adreno_drawctxt_invalidate() - Invalidate an adreno draw context
+ * @device: Pointer to the KGSL device structure for the GPU
+ * @context: Pointer to the KGSL context structure
+ *
+ * Invalidate the context and remove all queued commands and cancel any pending
+ * waiters
+ */
+void adreno_drawctxt_invalidate(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt = ADRENO_CONTEXT(context);
+	struct kgsl_drawobj *list[ADRENO_CONTEXT_DRAWQUEUE_SIZE];
+	int i, count;
+
+	trace_adreno_drawctxt_invalidate(drawctxt);
+
+	spin_lock(&drawctxt->lock);
+	set_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv);
+
+	/*
+	 * set the timestamp to the last value since the context is invalidated
+	 * and we want the pending events for this context to go away
+	 */
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
+
+	/* Get rid of commands still waiting in the queue */
+	count = drawctxt_detach_drawobjs(drawctxt, list);
+	spin_unlock(&drawctxt->lock);
+
+	for (i = 0; i < count; i++) {
+		kgsl_cancel_events_timestamp(device, &context->events,
+			list[i]->timestamp);
+		kgsl_drawobj_destroy(list[i]);
+	}
+
+	/* Make sure all pending events are processed or cancelled */
+	kgsl_flush_event_group(device, &context->events);
+
+	/* Give the bad news to everybody waiting around */
+	wake_up_all(&drawctxt->waiting);
+	wake_up_all(&drawctxt->wq);
+}
+
+/*
+ * Set the priority of the context based on the flags passed into context
+ * create.  If the priority is not set in the flags, then the kernel can
+ * assign any priority it desires for the context.
+ */
+#define KGSL_CONTEXT_PRIORITY_MED	0x8
+
+static inline void _set_context_priority(struct adreno_context *drawctxt)
+{
+	/* If the priority is not set by user, set it for them */
+	if ((drawctxt->base.flags & KGSL_CONTEXT_PRIORITY_MASK) ==
+			KGSL_CONTEXT_PRIORITY_UNDEF)
+		drawctxt->base.flags |= (KGSL_CONTEXT_PRIORITY_MED <<
+				KGSL_CONTEXT_PRIORITY_SHIFT);
+
+	/* Store the context priority */
+	drawctxt->base.priority =
+		(drawctxt->base.flags & KGSL_CONTEXT_PRIORITY_MASK) >>
+		KGSL_CONTEXT_PRIORITY_SHIFT;
+}
+
+/**
+ * adreno_drawctxt_create - create a new adreno draw context
+ * @dev_priv: the owner of the context
+ * @flags: flags for the context (passed from user space)
+ *
+ * Create and return a new draw context for the 3D core.
+ */
+struct kgsl_context *
+adreno_drawctxt_create(struct kgsl_device_private *dev_priv,
+			uint32_t *flags)
+{
+	struct adreno_context *drawctxt;
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	int ret;
+	unsigned long local;
+
+	local = *flags & (KGSL_CONTEXT_PREAMBLE |
+		KGSL_CONTEXT_NO_GMEM_ALLOC |
+		KGSL_CONTEXT_PER_CONTEXT_TS |
+		KGSL_CONTEXT_USER_GENERATED_TS |
+		KGSL_CONTEXT_NO_FAULT_TOLERANCE |
+		KGSL_CONTEXT_CTX_SWITCH |
+		KGSL_CONTEXT_PRIORITY_MASK |
+		KGSL_CONTEXT_TYPE_MASK |
+		KGSL_CONTEXT_PWR_CONSTRAINT |
+		KGSL_CONTEXT_IFH_NOP |
+		KGSL_CONTEXT_SECURE |
+		KGSL_CONTEXT_PREEMPT_STYLE_MASK |
+		KGSL_CONTEXT_NO_SNAPSHOT);
+
+	/* Check for errors before trying to initialize */
+
+	/* If preemption is not supported, ignore preemption request */
+	if (!test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv))
+		local &= ~KGSL_CONTEXT_PREEMPT_STYLE_MASK;
+
+	/* We no longer support legacy context switching */
+	if ((local & KGSL_CONTEXT_PREAMBLE) == 0 ||
+		(local & KGSL_CONTEXT_NO_GMEM_ALLOC) == 0) {
+		KGSL_DEV_ERR_ONCE(device,
+			"legacy context switch not supported\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	/* Make sure that our target can support secure contexts if requested */
+	if (!kgsl_mmu_is_secured(&dev_priv->device->mmu) &&
+			(local & KGSL_CONTEXT_SECURE)) {
+		KGSL_DEV_ERR_ONCE(device, "Secure context not supported\n");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	drawctxt = kzalloc(sizeof(struct adreno_context), GFP_KERNEL);
+
+	if (drawctxt == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	drawctxt->timestamp = 0;
+
+	drawctxt->base.flags = local;
+
+	/* Always enable per-context timestamps */
+	drawctxt->base.flags |= KGSL_CONTEXT_PER_CONTEXT_TS;
+	drawctxt->type = (drawctxt->base.flags & KGSL_CONTEXT_TYPE_MASK)
+		>> KGSL_CONTEXT_TYPE_SHIFT;
+	spin_lock_init(&drawctxt->lock);
+	init_waitqueue_head(&drawctxt->wq);
+	init_waitqueue_head(&drawctxt->waiting);
+
+	/* Set the context priority */
+	_set_context_priority(drawctxt);
+	/* set the context ringbuffer */
+	drawctxt->rb = adreno_ctx_get_rb(adreno_dev, drawctxt);
+
+	/*
+	 * Set up the plist node for the dispatcher.  Insert the node into the
+	 * drawctxt pending list based on priority.
+	 */
+	plist_node_init(&drawctxt->pending, drawctxt->base.priority);
+
+	/*
+	 * Now initialize the common part of the context. This allocates the
+	 * context id, and then possibly another thread could look it up.
+	 * So we want all of our initializtion that doesn't require the context
+	 * id to be done before this call.
+	 */
+	ret = kgsl_context_init(dev_priv, &drawctxt->base);
+	if (ret != 0) {
+		kfree(drawctxt);
+		return ERR_PTR(ret);
+	}
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, soptimestamp),
+			0);
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(drawctxt->base.id, eoptimestamp),
+			0);
+
+	adreno_context_debugfs_init(ADRENO_DEVICE(device), drawctxt);
+
+	INIT_LIST_HEAD(&drawctxt->active_node);
+
+	/* copy back whatever flags we dediced were valid */
+	*flags = drawctxt->base.flags;
+	return &drawctxt->base;
+}
+
+/**
+ * adreno_drawctxt_sched() - Schedule a previously blocked context
+ * @device: pointer to a KGSL device
+ * @drawctxt: drawctxt to rechedule
+ *
+ * This function is called by the core when it knows that a previously blocked
+ * context has been unblocked.  The default adreno response is to reschedule the
+ * context on the dispatcher
+ */
+void adreno_drawctxt_sched(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	adreno_dispatcher_queue_context(device, ADRENO_CONTEXT(context));
+}
+
+/**
+ * adreno_drawctxt_detach(): detach a context from the GPU
+ * @context: Generic KGSL context container for the context
+ *
+ */
+void adreno_drawctxt_detach(struct kgsl_context *context)
+{
+	struct kgsl_device *device;
+	struct adreno_device *adreno_dev;
+	struct adreno_context *drawctxt;
+	struct adreno_ringbuffer *rb;
+	int ret, count, i;
+	struct kgsl_drawobj *list[ADRENO_CONTEXT_DRAWQUEUE_SIZE];
+
+	if (context == NULL)
+		return;
+
+	device = context->device;
+	adreno_dev = ADRENO_DEVICE(device);
+	drawctxt = ADRENO_CONTEXT(context);
+	rb = drawctxt->rb;
+
+	spin_lock(&adreno_dev->active_list_lock);
+	list_del_init(&drawctxt->active_node);
+	spin_unlock(&adreno_dev->active_list_lock);
+
+	spin_lock(&drawctxt->lock);
+	count = drawctxt_detach_drawobjs(drawctxt, list);
+	spin_unlock(&drawctxt->lock);
+
+	for (i = 0; i < count; i++) {
+		/*
+		 * If the context is deteached while we are waiting for
+		 * the next command in GFT SKIP CMD, print the context
+		 * detached status here.
+		 */
+		adreno_fault_skipcmd_detached(adreno_dev, drawctxt, list[i]);
+		kgsl_drawobj_destroy(list[i]);
+	}
+
+	/*
+	 * internal_timestamp is set in adreno_ringbuffer_addcmds,
+	 * which holds the device mutex.
+	 */
+	mutex_lock(&device->mutex);
+
+	/*
+	 * Wait for the last global timestamp to pass before continuing.
+	 * The maxumum wait time is 30s, some large IB's can take longer
+	 * than 10s and if hang happens then the time for the context's
+	 * commands to retire will be greater than 10s. 30s should be sufficient
+	 * time to wait for the commands even if a hang happens.
+	 */
+	ret = adreno_drawctxt_wait_rb(adreno_dev, context,
+		drawctxt->internal_timestamp, 30 * 1000);
+
+	/*
+	 * If the wait for global fails due to timeout then nothing after this
+	 * point is likely to work very well - Get GPU snapshot and BUG_ON()
+	 * so we can take advantage of the debug tools to figure out what the
+	 * h - e - double hockey sticks happened. If EAGAIN error is returned
+	 * then recovery will kick in and there will be no more commands in the
+	 * RB pipe from this context which is waht we are waiting for, so ignore
+	 * -EAGAIN error
+	 */
+	if (ret && ret != -EAGAIN) {
+		KGSL_DRV_ERR(device, "Wait for global ts=%d type=%d error=%d\n",
+				drawctxt->internal_timestamp,
+				drawctxt->type, ret);
+		device->force_panic = 1;
+		kgsl_device_snapshot(device, context);
+	}
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp),
+			drawctxt->timestamp);
+
+	kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp),
+			drawctxt->timestamp);
+
+	adreno_profile_process_results(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+
+	/* wake threads waiting to submit commands from this context */
+	wake_up_all(&drawctxt->waiting);
+	wake_up_all(&drawctxt->wq);
+}
+
+void adreno_drawctxt_destroy(struct kgsl_context *context)
+{
+	struct adreno_context *drawctxt;
+
+	if (context == NULL)
+		return;
+
+	drawctxt = ADRENO_CONTEXT(context);
+	debugfs_remove_recursive(drawctxt->debug_root);
+	kfree(drawctxt);
+}
+
+static void _drawctxt_switch_wait_callback(struct kgsl_device *device,
+		struct kgsl_event_group *group,
+		void *priv, int result)
+{
+	struct adreno_context *drawctxt = (struct adreno_context *) priv;
+
+	kgsl_context_put(&drawctxt->base);
+}
+
+/**
+ * adreno_drawctxt_switch - switch the current draw context in a given RB
+ * @adreno_dev - The 3D device that owns the context
+ * @rb: The ringubffer pointer on which the current context is being changed
+ * @drawctxt - the 3D context to switch to
+ * @flags: Control flags for the switch
+ *
+ * Switch the current draw context in given RB
+ */
+
+int adreno_drawctxt_switch(struct adreno_device *adreno_dev,
+				struct adreno_ringbuffer *rb,
+				struct adreno_context *drawctxt,
+				unsigned int flags)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pagetable *new_pt;
+	int ret = 0;
+
+	/* We always expect a valid rb */
+	if (!rb)
+		return -EINVAL;
+
+	/* already current? */
+	if (rb->drawctxt_active == drawctxt)
+		return ret;
+
+	/*
+	 * Submitting pt switch commands from a detached context can
+	 * lead to a race condition where the pt is destroyed before
+	 * the pt switch commands get executed by the GPU, leading to
+	 * pagefaults.
+	 */
+	if (drawctxt != NULL && kgsl_context_detached(&drawctxt->base))
+		return -ENOENT;
+
+	trace_adreno_drawctxt_switch(rb, drawctxt);
+
+	/* Get a refcount to the new instance */
+	if (drawctxt) {
+		if (!_kgsl_context_get(&drawctxt->base))
+			return -ENOENT;
+
+		new_pt = drawctxt->base.proc_priv->pagetable;
+	} else {
+		 /* No context - set the default pagetable and thats it. */
+		new_pt = device->mmu.defaultpagetable;
+	}
+	ret = adreno_ringbuffer_set_pt_ctx(rb, new_pt, drawctxt, flags);
+	if (ret)
+		return ret;
+
+	if (rb->drawctxt_active) {
+		/* Wait for the timestamp to expire */
+		if (kgsl_add_event(device, &rb->events, rb->timestamp,
+			_drawctxt_switch_wait_callback,
+			rb->drawctxt_active)) {
+			kgsl_context_put(&rb->drawctxt_active->base);
+		}
+	}
+
+	rb->drawctxt_active = drawctxt;
+	return 0;
+}
diff --git a/drivers/gpu/msm/adreno_drawctxt.h b/drivers/gpu/msm/adreno_drawctxt.h
new file mode 100644
index 0000000..aeacf09
--- /dev/null
+++ b/drivers/gpu/msm/adreno_drawctxt.h
@@ -0,0 +1,140 @@
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_DRAWCTXT_H
+#define __ADRENO_DRAWCTXT_H
+
+struct adreno_context_type {
+	unsigned int type;
+	const char *str;
+};
+
+#define ADRENO_CONTEXT_DRAWQUEUE_SIZE 128
+#define SUBMIT_RETIRE_TICKS_SIZE 7
+
+struct kgsl_device;
+struct adreno_device;
+struct kgsl_device_private;
+struct kgsl_context;
+
+/**
+ * struct adreno_context - Adreno GPU draw context
+ * @timestamp: Last issued context-specific timestamp
+ * @internal_timestamp: Global timestamp of the last issued command
+ *			NOTE: guarded by device->mutex, not drawctxt->mutex!
+ * @type: Context type (GL, CL, RS)
+ * @mutex: Mutex to protect the drawqueue
+ * @drawqueue: Queue of drawobjs waiting to be dispatched for this
+ *			context
+ * @drawqueue_head: Head of the drawqueue queue
+ * @drawqueue_tail: Tail of the drawqueue queue
+ * @pending: Priority list node for the dispatcher list of pending contexts
+ * @wq: Workqueue structure for contexts to sleep pending room in the queue
+ * @waiting: Workqueue structure for contexts waiting for a timestamp or event
+ * @queued: Number of commands queued in the drawqueue
+ * @fault_policy: GFT fault policy set in _skip_cmd();
+ * @debug_root: debugfs entry for this context.
+ * @queued_timestamp: The last timestamp that was queued on this context
+ * @rb: The ringbuffer in which this context submits commands.
+ * @submitted_timestamp: The last timestamp that was submitted for this context
+ * @submit_retire_ticks: Array to hold command obj execution times from submit
+ *                       to retire
+ * @ticks_index: The index into submit_retire_ticks[] where the new delta will
+ *		 be written.
+ * @active_node: Linkage for nodes in active_list
+ * @active_time: Time when this context last seen
+ */
+struct adreno_context {
+	struct kgsl_context base;
+	unsigned int timestamp;
+	unsigned int internal_timestamp;
+	unsigned int type;
+	spinlock_t lock;
+
+	/* Dispatcher */
+	struct kgsl_drawobj *drawqueue[ADRENO_CONTEXT_DRAWQUEUE_SIZE];
+	unsigned int drawqueue_head;
+	unsigned int drawqueue_tail;
+
+	struct plist_node pending;
+	wait_queue_head_t wq;
+	wait_queue_head_t waiting;
+
+	int queued;
+	unsigned int fault_policy;
+	struct dentry *debug_root;
+	unsigned int queued_timestamp;
+	struct adreno_ringbuffer *rb;
+	unsigned int submitted_timestamp;
+	uint64_t submit_retire_ticks[SUBMIT_RETIRE_TICKS_SIZE];
+	int ticks_index;
+
+	struct list_head active_node;
+	unsigned long active_time;
+};
+
+/* Flag definitions for flag field in adreno_context */
+
+/**
+ * enum adreno_context_priv - Private flags for an adreno draw context
+ * @ADRENO_CONTEXT_FAULT - set if the context has faulted (and recovered)
+ * @ADRENO_CONTEXT_GPU_HANG - Context has caused a GPU hang
+ * @ADRENO_CONTEXT_GPU_HANG_FT - Context has caused a GPU hang
+ *      and fault tolerance was successful
+ * @ADRENO_CONTEXT_SKIP_EOF - Context skip IBs until the next end of frame
+ *      marker.
+ * @ADRENO_CONTEXT_FORCE_PREAMBLE - Force the preamble for the next submission.
+ * @ADRENO_CONTEXT_SKIP_CMD - Context's drawobj's skipped during
+	fault tolerance.
+ * @ADRENO_CONTEXT_FENCE_LOG - Dump fences on this context.
+ */
+enum adreno_context_priv {
+	ADRENO_CONTEXT_FAULT = KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC,
+	ADRENO_CONTEXT_GPU_HANG,
+	ADRENO_CONTEXT_GPU_HANG_FT,
+	ADRENO_CONTEXT_SKIP_EOF,
+	ADRENO_CONTEXT_FORCE_PREAMBLE,
+	ADRENO_CONTEXT_SKIP_CMD,
+	ADRENO_CONTEXT_FENCE_LOG,
+};
+
+/* Flags for adreno_drawctxt_switch() */
+#define ADRENO_CONTEXT_SWITCH_FORCE_GPU BIT(0)
+
+struct kgsl_context *adreno_drawctxt_create(
+			struct kgsl_device_private *dev_priv,
+			uint32_t *flags);
+
+void adreno_drawctxt_detach(struct kgsl_context *context);
+
+void adreno_drawctxt_destroy(struct kgsl_context *context);
+
+void adreno_drawctxt_sched(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+struct adreno_ringbuffer;
+int adreno_drawctxt_switch(struct adreno_device *adreno_dev,
+				struct adreno_ringbuffer *rb,
+				struct adreno_context *drawctxt,
+				unsigned int flags);
+
+int adreno_drawctxt_wait(struct adreno_device *adreno_dev,
+		struct kgsl_context *context,
+		uint32_t timestamp, unsigned int timeout);
+
+void adreno_drawctxt_invalidate(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+void adreno_drawctxt_dump(struct kgsl_device *device,
+		struct kgsl_context *context);
+
+#endif  /* __ADRENO_DRAWCTXT_H */
diff --git a/drivers/gpu/msm/adreno_ioctl.c b/drivers/gpu/msm/adreno_ioctl.c
new file mode 100644
index 0000000..7325bbb
--- /dev/null
+++ b/drivers/gpu/msm/adreno_ioctl.c
@@ -0,0 +1,177 @@
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/ioctl.h>
+#include "kgsl_device.h"
+#include "adreno.h"
+#include "adreno_a5xx.h"
+
+long adreno_ioctl_perfcounter_get(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_perfcounter_get *get = data;
+	int result;
+
+	mutex_lock(&device->mutex);
+
+	/*
+	 * adreno_perfcounter_get() is called by kernel clients
+	 * during start(), so it is not safe to take an
+	 * active count inside that function.
+	 */
+	result = kgsl_active_count_get(device);
+
+	if (result == 0) {
+		result = adreno_perfcounter_get(adreno_dev,
+			get->groupid, get->countable, &get->offset,
+			&get->offset_hi, PERFCOUNTER_FLAG_NONE);
+		kgsl_active_count_put(device);
+	}
+	mutex_unlock(&device->mutex);
+
+	return (long) result;
+}
+
+long adreno_ioctl_perfcounter_put(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_perfcounter_put *put = data;
+	int result;
+
+	mutex_lock(&device->mutex);
+	result = adreno_perfcounter_put(adreno_dev, put->groupid,
+		put->countable, PERFCOUNTER_FLAG_NONE);
+	mutex_unlock(&device->mutex);
+
+	return (long) result;
+}
+
+static long adreno_ioctl_perfcounter_query(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_query *query = data;
+
+	return (long) adreno_perfcounter_query_group(adreno_dev, query->groupid,
+			query->countables, query->count, &query->max_counters);
+}
+
+static long adreno_ioctl_perfcounter_read(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct kgsl_perfcounter_read *read = data;
+
+	return (long) adreno_perfcounter_read_group(adreno_dev, read->reads,
+		read->count);
+}
+
+static long adreno_ioctl_preemption_counters_query(
+		struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(dev_priv->device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_preemption_counters_query *read = data;
+	int size_level = A5XX_CP_CTXRECORD_PREEMPTION_COUNTER_SIZE;
+	int levels_to_copy;
+
+	if (!adreno_is_a5xx(adreno_dev) ||
+		!adreno_is_preemption_enabled(adreno_dev))
+		return -EOPNOTSUPP;
+
+	if (read->size_user < size_level)
+		return -EINVAL;
+
+	/* Calculate number of preemption counter levels to copy to userspace */
+	levels_to_copy = (read->size_user / size_level);
+	if (levels_to_copy > gpudev->num_prio_levels)
+		levels_to_copy = gpudev->num_prio_levels;
+
+	if (copy_to_user((void __user *) (uintptr_t) read->counters,
+			adreno_dev->preempt.counters.hostptr,
+			levels_to_copy * size_level))
+		return -EFAULT;
+
+	read->max_priority_level = levels_to_copy;
+	read->size_priority_level = size_level;
+
+	return 0;
+}
+
+long adreno_ioctl_helper(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg,
+		const struct kgsl_ioctl *cmds, int len)
+{
+	unsigned char data[128] = { 0 };
+	long ret;
+	int i;
+
+	static DEFINE_RATELIMIT_STATE(_rs,
+			DEFAULT_RATELIMIT_INTERVAL,
+			DEFAULT_RATELIMIT_BURST);
+
+	for (i = 0; i < len; i++) {
+		if (_IOC_NR(cmd) == _IOC_NR(cmds[i].cmd))
+			break;
+	}
+
+	if (i == len) {
+		KGSL_DRV_INFO(dev_priv->device,
+			"invalid ioctl code 0x%08X\n", cmd);
+		return -ENOIOCTLCMD;
+	}
+
+	if (WARN_ON(_IOC_SIZE(cmds[i].cmd) > sizeof(data))) {
+		if (__ratelimit(&_rs))
+			WARN(1, "data too big for ioctl 0x%08X: %d/%ld\n",
+				cmd, _IOC_SIZE(cmds[i].cmd), sizeof(data));
+		return -EINVAL;
+	}
+
+	if (_IOC_SIZE(cmds[i].cmd)) {
+		ret = kgsl_ioctl_copy_in(cmds[i].cmd, cmd, arg, data);
+
+		if (ret)
+			return ret;
+	} else {
+		memset(data, 0, sizeof(data));
+	}
+
+	ret = cmds[i].func(dev_priv, cmd, data);
+
+	if (ret == 0 && _IOC_SIZE(cmds[i].cmd))
+		ret = kgsl_ioctl_copy_out(cmds[i].cmd, cmd, arg, data);
+
+	return ret;
+}
+
+static struct kgsl_ioctl adreno_ioctl_funcs[] = {
+	{ IOCTL_KGSL_PERFCOUNTER_GET, adreno_ioctl_perfcounter_get },
+	{ IOCTL_KGSL_PERFCOUNTER_PUT, adreno_ioctl_perfcounter_put },
+	{ IOCTL_KGSL_PERFCOUNTER_QUERY, adreno_ioctl_perfcounter_query },
+	{ IOCTL_KGSL_PERFCOUNTER_READ, adreno_ioctl_perfcounter_read },
+	{ IOCTL_KGSL_PREEMPTIONCOUNTER_QUERY,
+		adreno_ioctl_preemption_counters_query },
+};
+
+long adreno_ioctl(struct kgsl_device_private *dev_priv,
+			      unsigned int cmd, unsigned long arg)
+{
+	return adreno_ioctl_helper(dev_priv, cmd, arg,
+		adreno_ioctl_funcs, ARRAY_SIZE(adreno_ioctl_funcs));
+}
diff --git a/drivers/gpu/msm/adreno_iommu.c b/drivers/gpu/msm/adreno_iommu.c
new file mode 100644
index 0000000..aa41f11
--- /dev/null
+++ b/drivers/gpu/msm/adreno_iommu.c
@@ -0,0 +1,907 @@
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include "adreno.h"
+#include "kgsl_sharedmem.h"
+#include "a3xx_reg.h"
+#include "adreno_pm4types.h"
+
+#define A5XX_PFP_PER_PROCESS_UCODE_VER 0x5FF064
+#define A5XX_PM4_PER_PROCESS_UCODE_VER 0x5FF052
+
+/*
+ * _wait_reg() - make CP poll on a register
+ * @cmds:	Pointer to memory where commands are to be added
+ * @addr:	Register address to poll for
+ * @val:	Value to poll for
+ * @mask:	The value against which register value is masked
+ * @interval:	wait interval
+ */
+static unsigned int _wait_reg(struct adreno_device *adreno_dev,
+			unsigned int *cmds, unsigned int addr,
+			unsigned int val, unsigned int mask,
+			unsigned int interval)
+{
+	unsigned int *start = cmds;
+
+	if (adreno_is_a3xx(adreno_dev)) {
+		*cmds++ = cp_packet(adreno_dev, CP_WAIT_REG_EQ, 4);
+		*cmds++ = addr;
+		*cmds++ = val;
+		*cmds++ = mask;
+		*cmds++ = interval;
+	} else {
+		*cmds++ = cp_mem_packet(adreno_dev, CP_WAIT_REG_MEM, 5, 1);
+		*cmds++ = 0x3; /* Mem Space = Register,  Function = Equals */
+		cmds += cp_gpuaddr(adreno_dev, cmds, addr); /* Poll address */
+		*cmds++ = val; /* ref val */
+		*cmds++ = mask;
+		*cmds++ = interval;
+
+		/* WAIT_REG_MEM turns back on protected mode - push it off */
+		*cmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*cmds++ = 0;
+	}
+
+	return cmds - start;
+}
+
+#define KGSL_MMU(_dev) \
+	((struct kgsl_mmu *) (&(KGSL_DEVICE((_dev))->mmu)))
+
+static unsigned int  _iommu_lock(struct adreno_device *adreno_dev,
+				 unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+
+	/*
+	 * If we don't have this register, probe should have forced
+	 * global pagetables and we shouldn't get here.
+	 */
+	if (WARN_ONCE(iommu->micro_mmu_ctrl == UINT_MAX,
+		"invalid GPU IOMMU lock sequence\n"))
+		return 0;
+
+	/*
+	 * glue commands together until next
+	 * WAIT_FOR_ME
+	 */
+	cmds += _wait_reg(adreno_dev, cmds,
+			adreno_getreg(adreno_dev, ADRENO_REG_CP_WFI_PEND_CTR),
+			1, 0xFFFFFFFF, 0xF);
+
+	/* set the iommu lock bit */
+	*cmds++ = cp_packet(adreno_dev, CP_REG_RMW, 3);
+	*cmds++ = iommu->micro_mmu_ctrl >> 2;
+	/* AND to unmask the lock bit */
+	*cmds++ = ~(KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_HALT);
+	/* OR to set the IOMMU lock bit */
+	*cmds++ = KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_HALT;
+
+	/* wait for smmu to lock */
+	cmds += _wait_reg(adreno_dev, cmds, iommu->micro_mmu_ctrl >> 2,
+			KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_IDLE,
+			KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_IDLE, 0xF);
+
+	return cmds - start;
+}
+
+static unsigned int _iommu_unlock(struct adreno_device *adreno_dev,
+				  unsigned int *cmds)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+	unsigned int *start = cmds;
+
+	/*
+	 * If we don't have this register, probe should have forced
+	 * global pagetables and we shouldn't get here.
+	 */
+	if (WARN_ONCE(iommu->micro_mmu_ctrl == UINT_MAX,
+		"invalid GPU IOMMU unlock sequence\n"))
+		return 0;
+
+	/* unlock the IOMMU lock */
+	*cmds++ = cp_packet(adreno_dev, CP_REG_RMW, 3);
+	*cmds++ = iommu->micro_mmu_ctrl >> 2;
+	/* AND to unmask the lock bit */
+	*cmds++ = ~(KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_HALT);
+	/* OR with 0 so lock bit is unset */
+	*cmds++ = 0;
+
+	/* release all commands since _iommu_lock() with wait_for_me */
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	return cmds - start;
+}
+
+static unsigned int _vbif_lock(struct adreno_device *adreno_dev,
+			unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+	/*
+	 * glue commands together until next
+	 * WAIT_FOR_ME
+	 */
+	cmds += _wait_reg(adreno_dev, cmds,
+			adreno_getreg(adreno_dev, ADRENO_REG_CP_WFI_PEND_CTR),
+			1, 0xFFFFFFFF, 0xF);
+
+	/* MMU-500 VBIF stall */
+	*cmds++ = cp_packet(adreno_dev, CP_REG_RMW, 3);
+	*cmds++ = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0;
+	/* AND to unmask the HALT bit */
+	*cmds++ = ~(VBIF_RECOVERABLE_HALT_CTRL);
+	/* OR to set the HALT bit */
+	*cmds++ = 0x1;
+
+	/* Wait for acknowledgment */
+	cmds += _wait_reg(adreno_dev, cmds,
+			A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL1,
+			1, 0xFFFFFFFF, 0xF);
+
+	return cmds - start;
+}
+
+static unsigned int _vbif_unlock(struct adreno_device *adreno_dev,
+				unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+
+	/* MMU-500 VBIF unstall */
+	*cmds++ = cp_packet(adreno_dev, CP_REG_RMW, 3);
+	*cmds++ = A3XX_VBIF_DDR_OUTPUT_RECOVERABLE_HALT_CTRL0;
+	/* AND to unmask the HALT bit */
+	*cmds++ = ~(VBIF_RECOVERABLE_HALT_CTRL);
+	/* OR to reset the HALT bit */
+	*cmds++ = 0;
+
+	/* release all commands since _vbif_lock() with wait_for_me */
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+	return cmds - start;
+}
+
+static unsigned int _cp_smmu_reg(struct adreno_device *adreno_dev,
+				unsigned int *cmds,
+				enum kgsl_iommu_reg_map reg,
+				unsigned int num)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+	unsigned int *start = cmds;
+	unsigned int offset;
+
+	offset = kgsl_mmu_get_reg_ahbaddr(KGSL_MMU(adreno_dev),
+					  KGSL_IOMMU_CONTEXT_USER, reg) >> 2;
+
+	/* Required for a3x, a4x, a5x families */
+	if (adreno_is_a5xx(adreno_dev) || iommu->version == 1) {
+		*cmds++ = cp_register(adreno_dev, offset, num);
+	} else if (adreno_is_a3xx(adreno_dev)) {
+		*cmds++ = cp_packet(adreno_dev, CP_REG_WR_NO_CTXT, num + 1);
+		*cmds++ = offset;
+	} else if (adreno_is_a4xx(adreno_dev)) {
+		*cmds++ = cp_packet(adreno_dev, CP_WIDE_REG_WRITE, num + 1);
+		*cmds++ = offset;
+	}
+	return cmds - start;
+}
+
+static unsigned int _tlbiall(struct adreno_device *adreno_dev,
+				unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+	unsigned int tlbstatus;
+
+	tlbstatus = kgsl_mmu_get_reg_ahbaddr(KGSL_MMU(adreno_dev),
+			KGSL_IOMMU_CONTEXT_USER,
+			KGSL_IOMMU_CTX_TLBSTATUS) >> 2;
+
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TLBIALL, 1);
+	*cmds++ = 1;
+
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TLBSYNC, 1);
+	*cmds++ = 0;
+
+	cmds += _wait_reg(adreno_dev, cmds, tlbstatus, 0,
+			KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE, 0xF);
+
+	return cmds - start;
+}
+
+
+/**
+ * _adreno_iommu_add_idle_cmds - Add pm4 packets for GPU idle
+ * @adreno_dev - Pointer to device structure
+ * @cmds - Pointer to memory where idle commands need to be added
+ */
+static inline int _adreno_iommu_add_idle_cmds(struct adreno_device *adreno_dev,
+							unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+
+	cmds += cp_wait_for_idle(adreno_dev, cmds);
+
+	if (adreno_is_a3xx(adreno_dev))
+		cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	return cmds - start;
+}
+
+/*
+ * _invalidate_uche_cpu() - Invalidate UCHE using CPU
+ * @adreno_dev: the device
+ */
+static void _invalidate_uche_cpu(struct adreno_device *adreno_dev)
+{
+	/* Invalidate UCHE using CPU */
+	if (adreno_is_a5xx(adreno_dev))
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE0, 0x12);
+	else if (adreno_is_a4xx(adreno_dev)) {
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE0, 0);
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE1, 0x12);
+	} else if (adreno_is_a3xx(adreno_dev)) {
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE0, 0);
+		adreno_writereg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE1,
+			0x90000000);
+	} else {
+		WARN_ONCE(1, "GPU UCHE invalidate sequence not defined\n");
+	}
+}
+
+/*
+ * _ctx_switch_use_cpu_path() - Decide whether to use cpu path
+ * @adreno_dev: the device
+ * @new_pt: pagetable to switch
+ * @rb: ringbuffer for ctx switch
+ *
+ * If we are idle and switching to default pagetable it is
+ * preferable to poke the iommu directly rather than using the
+ * GPU command stream.
+ */
+static bool _ctx_switch_use_cpu_path(
+				struct adreno_device *adreno_dev,
+				struct kgsl_pagetable *new_pt,
+				struct adreno_ringbuffer *rb)
+{
+	struct kgsl_mmu *mmu = KGSL_MMU(adreno_dev);
+
+	/*
+	 * If rb is current, we can use cpu path when GPU is
+	 * idle and we are switching to default pt.
+	 * If rb is not current, we can use cpu path when rb has no
+	 * pending commands (rptr = wptr) and we are switching to default pt.
+	 */
+	if (adreno_dev->cur_rb == rb)
+		return adreno_isidle(KGSL_DEVICE(adreno_dev)) &&
+			(new_pt == mmu->defaultpagetable);
+	else if (adreno_rb_empty(rb) &&
+			(new_pt == mmu->defaultpagetable))
+		return true;
+
+	return false;
+}
+
+/**
+ * adreno_iommu_set_apriv() - Generate commands to set/reset the APRIV
+ * @adreno_dev: Device on which the commands will execute
+ * @cmds: The memory pointer where commands are generated
+ * @set: If set then APRIV is set else reset
+ *
+ * Returns the number of commands generated
+ */
+static unsigned int adreno_iommu_set_apriv(struct adreno_device *adreno_dev,
+				unsigned int *cmds, int set)
+{
+	unsigned int *cmds_orig = cmds;
+
+	/* adreno 3xx doesn't have the CP_CNTL.APRIV field */
+	if (adreno_is_a3xx(adreno_dev))
+		return 0;
+
+	cmds += cp_wait_for_idle(adreno_dev, cmds);
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+	*cmds++ = cp_register(adreno_dev, adreno_getreg(adreno_dev,
+				ADRENO_REG_CP_CNTL), 1);
+	if (set)
+		*cmds++ = 1;
+	else
+		*cmds++ = 0;
+
+	return cmds - cmds_orig;
+}
+
+static inline int _adreno_iommu_add_idle_indirect_cmds(
+			struct adreno_device *adreno_dev,
+			unsigned int *cmds, uint64_t nop_gpuaddr)
+{
+	unsigned int *start = cmds;
+	/*
+	 * Adding an indirect buffer ensures that the prefetch stalls until
+	 * the commands in indirect buffer have completed. We need to stall
+	 * prefetch with a nop indirect buffer when updating pagetables
+	 * because it provides stabler synchronization.
+	 */
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+	*cmds++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, nop_gpuaddr);
+	*cmds++ = 2;
+	cmds += cp_wait_for_idle(adreno_dev, cmds);
+	return cmds - start;
+}
+
+/**
+ * _adreno_mmu_set_pt_update_condition() - Generate commands to setup a
+ * flag to indicate whether pt switch is required or not by comparing
+ * current pt id and incoming pt id
+ * @rb: The RB on which the commands will execute
+ * @cmds: The pointer to memory where the commands are placed.
+ * @ptname: Incoming pt id to set to
+ *
+ * Returns number of commands added.
+ */
+static unsigned int _adreno_mmu_set_pt_update_condition(
+			struct adreno_ringbuffer *rb,
+			unsigned int *cmds, unsigned int ptname)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	unsigned int *cmds_orig = cmds;
+	/*
+	 * write 1 to switch pt flag indicating that we need to execute the
+	 * pt switch commands
+	 */
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, (rb->pagetable_desc.gpuaddr +
+		PT_INFO_OFFSET(switch_pt_enable)));
+	*cmds++ = 1;
+	*cmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 1);
+	*cmds++ = 0;
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+	/*
+	 * The current ptname is
+	 * directly compared to the incoming pt id
+	 */
+	*cmds++ = cp_mem_packet(adreno_dev, CP_COND_WRITE, 6, 2);
+	/* write to mem space, when a mem space is equal to ref val */
+	*cmds++ = (1 << 8) | (1 << 4) | 3;
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+	   (adreno_dev->ringbuffers[0].pagetable_desc.gpuaddr +
+		PT_INFO_OFFSET(current_global_ptname)));
+	*cmds++ = ptname;
+	*cmds++ = 0xFFFFFFFF;
+	cmds += cp_gpuaddr(adreno_dev, cmds, (rb->pagetable_desc.gpuaddr +
+		PT_INFO_OFFSET(switch_pt_enable)));
+	*cmds++ = 0;
+	*cmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 1);
+	*cmds++ = 0;
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	return cmds - cmds_orig;
+}
+
+/**
+ * _adreno_iommu_pt_update_pid_to_mem() - Add commands to write to memory the
+ * pagetable id.
+ * @rb: The ringbuffer on which these commands will execute
+ * @cmds: Pointer to memory where the commands are copied
+ * @ptname: The pagetable id
+ */
+static unsigned int _adreno_iommu_pt_update_pid_to_mem(
+				struct adreno_ringbuffer *rb,
+				unsigned int *cmds, int ptname)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	unsigned int *cmds_orig = cmds;
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, (rb->pagetable_desc.gpuaddr +
+		PT_INFO_OFFSET(current_rb_ptname)));
+	*cmds++ = ptname;
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+		(adreno_dev->ringbuffers[0].pagetable_desc.gpuaddr +
+		PT_INFO_OFFSET(current_global_ptname)));
+	*cmds++ = ptname;
+	/* pagetable switch done, Housekeeping: set the switch_pt_enable to 0 */
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, (rb->pagetable_desc.gpuaddr +
+		PT_INFO_OFFSET(switch_pt_enable)));
+	*cmds++ = 0;
+	*cmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 1);
+	*cmds++ = 0;
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	return cmds - cmds_orig;
+}
+
+static unsigned int _adreno_iommu_set_pt_v1(struct adreno_ringbuffer *rb,
+					unsigned int *cmds_orig,
+					u64 ttbr0, u32 contextidr, u32 ptname)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	unsigned int *cmds = cmds_orig;
+	unsigned int *cond_exec_ptr;
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+
+	/* set flag that indicates whether pt switch is required*/
+	cmds += _adreno_mmu_set_pt_update_condition(rb, cmds, ptname);
+	*cmds++ = cp_mem_packet(adreno_dev, CP_COND_EXEC, 4, 2);
+	cmds += cp_gpuaddr(adreno_dev, cmds, (rb->pagetable_desc.gpuaddr +
+		PT_INFO_OFFSET(switch_pt_enable)));
+	cmds += cp_gpuaddr(adreno_dev, cmds, (rb->pagetable_desc.gpuaddr +
+		PT_INFO_OFFSET(switch_pt_enable)));
+	*cmds++ = 1;
+	/* Exec count to be filled later */
+	cond_exec_ptr = cmds;
+	cmds++;
+
+	cmds += cp_wait_for_idle(adreno_dev, cmds);
+
+	cmds += _iommu_lock(adreno_dev, cmds);
+
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TTBR0, 2);
+	*cmds++ = lower_32_bits(ttbr0);
+	*cmds++ = upper_32_bits(ttbr0);
+	cmds += _cp_smmu_reg(adreno_dev, cmds,
+			KGSL_IOMMU_CTX_CONTEXTIDR, 1);
+	*cmds++ = contextidr;
+
+	/* a3xx doesn't have MEQ space to hold the TLBI commands */
+	if (adreno_is_a3xx(adreno_dev))
+		cmds += _iommu_unlock(adreno_dev, cmds);
+
+	cmds += _tlbiall(adreno_dev, cmds);
+
+	/* unlock or wait for me to finish the TLBI */
+	if (!adreno_is_a3xx(adreno_dev))
+		cmds += _iommu_unlock(adreno_dev, cmds);
+	else
+		cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	/* Exec count ordinal of CP_COND_EXEC packet */
+	*cond_exec_ptr = (cmds - cond_exec_ptr - 1);
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+	cmds += _adreno_iommu_pt_update_pid_to_mem(rb, cmds, ptname);
+
+	return cmds - cmds_orig;
+}
+
+
+static unsigned int _adreno_iommu_set_pt_v2_a3xx(struct kgsl_device *device,
+					unsigned int *cmds_orig,
+					u64 ttbr0, u32 contextidr)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int *cmds = cmds_orig;
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+
+	cmds += _vbif_lock(adreno_dev, cmds);
+
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TTBR0, 2);
+	*cmds++ = lower_32_bits(ttbr0);
+	*cmds++ = upper_32_bits(ttbr0);
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_CONTEXTIDR, 1);
+	*cmds++ = contextidr;
+
+	cmds += _vbif_unlock(adreno_dev, cmds);
+
+	cmds += _tlbiall(adreno_dev, cmds);
+
+	/* wait for me to finish the TLBI */
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+
+	return cmds - cmds_orig;
+}
+
+static unsigned int _adreno_iommu_set_pt_v2_a4xx(struct kgsl_device *device,
+					unsigned int *cmds_orig,
+					u64 ttbr0, u32 contextidr)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int *cmds = cmds_orig;
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+
+	cmds += _vbif_lock(adreno_dev, cmds);
+
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_TTBR0, 2);
+	*cmds++ = lower_32_bits(ttbr0);
+	*cmds++ = upper_32_bits(ttbr0);
+	cmds += _cp_smmu_reg(adreno_dev, cmds, KGSL_IOMMU_CTX_CONTEXTIDR, 1);
+	*cmds++ = contextidr;
+
+	cmds += _vbif_unlock(adreno_dev, cmds);
+
+	cmds += _tlbiall(adreno_dev, cmds);
+
+	/* wait for me to finish the TLBI */
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+
+	return cmds - cmds_orig;
+}
+
+static unsigned int _adreno_iommu_set_pt_v2_a5xx(struct kgsl_device *device,
+					unsigned int *cmds_orig,
+					u64 ttbr0, u32 contextidr,
+					struct adreno_ringbuffer *rb)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int *cmds = cmds_orig;
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	/* CP switches the pagetable and flushes the Caches */
+	*cmds++ = cp_packet(adreno_dev, CP_SMMU_TABLE_UPDATE, 3);
+	*cmds++ = lower_32_bits(ttbr0);
+	*cmds++ = upper_32_bits(ttbr0);
+	*cmds++ = contextidr;
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 4, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, (rb->pagetable_desc.gpuaddr +
+		PT_INFO_OFFSET(ttbr0)));
+	*cmds++ = lower_32_bits(ttbr0);
+	*cmds++ = upper_32_bits(ttbr0);
+	*cmds++ = contextidr;
+
+	/* release all commands with wait_for_me */
+	cmds += cp_wait_for_me(adreno_dev, cmds);
+
+	cmds += _adreno_iommu_add_idle_cmds(adreno_dev, cmds);
+
+	return cmds - cmds_orig;
+}
+
+/**
+ * adreno_iommu_set_pt_generate_cmds() - Generate commands to change pagetable
+ * @rb: The RB pointer in which these commaands are to be submitted
+ * @cmds: The pointer where the commands are placed
+ * @pt: The pagetable to switch to
+ */
+unsigned int adreno_iommu_set_pt_generate_cmds(
+					struct adreno_ringbuffer *rb,
+					unsigned int *cmds,
+					struct kgsl_pagetable *pt)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+	u64 ttbr0;
+	u32 contextidr;
+	unsigned int *cmds_orig = cmds;
+
+	ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pt);
+	contextidr = kgsl_mmu_pagetable_get_contextidr(pt);
+
+	cmds += adreno_iommu_set_apriv(adreno_dev, cmds, 1);
+
+	cmds += _adreno_iommu_add_idle_indirect_cmds(adreno_dev, cmds,
+		iommu->setstate.gpuaddr + KGSL_IOMMU_SETSTATE_NOP_OFFSET);
+
+	if (iommu->version >= 2) {
+		if (adreno_is_a5xx(adreno_dev))
+			cmds += _adreno_iommu_set_pt_v2_a5xx(device, cmds,
+						ttbr0, contextidr, rb);
+		else if (adreno_is_a4xx(adreno_dev))
+			cmds += _adreno_iommu_set_pt_v2_a4xx(device, cmds,
+						ttbr0, contextidr);
+		else if (adreno_is_a3xx(adreno_dev))
+			cmds += _adreno_iommu_set_pt_v2_a3xx(device, cmds,
+						ttbr0, contextidr);
+		else
+			WARN_ONCE(1,
+			"GPU IOMMU set pagetable sequence not defined\n");
+	} else {
+		cmds += _adreno_iommu_set_pt_v1(rb, cmds, ttbr0, contextidr,
+						pt->name);
+	}
+
+	/* invalidate all base pointers */
+	cmds += cp_invalidate_state(adreno_dev, cmds);
+
+	cmds += adreno_iommu_set_apriv(adreno_dev, cmds, 0);
+
+	return cmds - cmds_orig;
+}
+
+/**
+ * __add_curr_ctxt_cmds() - Add commands to set a context id in memstore
+ * @rb: The RB in which the commands will be added for execution
+ * @cmds: Pointer to memory where commands are added
+ * @drawctxt: The context whose id is being set in memstore
+ *
+ * Returns the number of dwords
+ */
+static unsigned int __add_curr_ctxt_cmds(struct adreno_ringbuffer *rb,
+			unsigned int *cmds,
+			struct adreno_context *drawctxt)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int *cmds_orig = cmds;
+
+	/* write the context identifier to memstore memory */
+	*cmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+	*cmds++ = KGSL_CONTEXT_TO_MEM_IDENTIFIER;
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			MEMSTORE_RB_GPU_ADDR(device, rb, current_context));
+	*cmds++ = (drawctxt ? drawctxt->base.id : 0);
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds,
+			MEMSTORE_ID_GPU_ADDR(device,
+				KGSL_MEMSTORE_GLOBAL, current_context));
+	*cmds++ = (drawctxt ? drawctxt->base.id : 0);
+
+	/* Invalidate UCHE for new context */
+	if (adreno_is_a5xx(adreno_dev)) {
+		*cmds++ = cp_register(adreno_dev,
+			adreno_getreg(adreno_dev,
+		ADRENO_REG_UCHE_INVALIDATE0), 1);
+		*cmds++ = 0x12;
+	} else if (adreno_is_a4xx(adreno_dev)) {
+		*cmds++ = cp_register(adreno_dev,
+			adreno_getreg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE0), 2);
+		*cmds++ = 0;
+		*cmds++ = 0x12;
+	} else if (adreno_is_a3xx(adreno_dev)) {
+		*cmds++ = cp_register(adreno_dev,
+			adreno_getreg(adreno_dev,
+			ADRENO_REG_UCHE_INVALIDATE0), 2);
+		*cmds++ = 0;
+		*cmds++ = 0x90000000;
+	} else
+		WARN_ONCE(1, "GPU UCHE invalidate sequence not defined\n");
+
+	return cmds - cmds_orig;
+}
+
+/*
+ * _set_ctxt_cpu() - Set the current context in memstore
+ * @rb: The ringbuffer memstore to set curr context
+ * @drawctxt: The context whose id is being set in memstore
+ */
+static void _set_ctxt_cpu(struct adreno_ringbuffer *rb,
+			struct adreno_context *drawctxt)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (rb == adreno_dev->cur_rb) {
+		_invalidate_uche_cpu(adreno_dev);
+		/* Update global memstore with current context */
+		kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL,
+						current_context),
+			drawctxt ? drawctxt->base.id : 0);
+	}
+	/* Update rb memstore with current context */
+	kgsl_sharedmem_writel(device, &device->memstore,
+		MEMSTORE_RB_OFFSET(rb, current_context),
+		drawctxt ? drawctxt->base.id : 0);
+}
+
+/**
+ * _set_ctxt_gpu() - Add commands to set the current context in memstore
+ * @rb: The ringbuffer in which commands to set memstore are added
+ * @drawctxt: The context whose id is being set in memstore
+ */
+static int _set_ctxt_gpu(struct adreno_ringbuffer *rb,
+			struct adreno_context *drawctxt)
+{
+	unsigned int link[15], *cmds;
+	int result;
+
+	cmds = &link[0];
+	cmds += __add_curr_ctxt_cmds(rb, cmds, drawctxt);
+	result = adreno_ringbuffer_issuecmds(rb, 0, link,
+			(unsigned int)(cmds - link));
+	return result;
+}
+
+/**
+ * _set_pagetable_cpu() - Use CPU to switch the pagetable
+ * @rb: The rb for which pagetable needs to be switched
+ * @new_pt: The pagetable to switch to
+ */
+static int _set_pagetable_cpu(struct adreno_ringbuffer *rb,
+			struct kgsl_pagetable *new_pt)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int result;
+
+	/* update TTBR0 only if we are updating current RB */
+	if (adreno_dev->cur_rb == rb) {
+		result = kgsl_mmu_set_pt(&device->mmu, new_pt);
+		if (result)
+			return result;
+		/* write the new pt set to memory var */
+		adreno_ringbuffer_set_global(adreno_dev, new_pt->name);
+	}
+
+	/* Update the RB pagetable info here */
+	adreno_ringbuffer_set_pagetable(rb, new_pt);
+
+	return 0;
+}
+
+/**
+ * _set_pagetable_gpu() - Use GPU to switch the pagetable
+ * @rb: The rb in which commands to switch pagetable are to be
+ *    submitted
+ * @new_pt: The pagetable to switch to
+ */
+static int _set_pagetable_gpu(struct adreno_ringbuffer *rb,
+			struct kgsl_pagetable *new_pt)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	unsigned int *link = NULL, *cmds;
+	int result;
+
+	link = kmalloc(PAGE_SIZE, GFP_KERNEL);
+	if (link == NULL)
+		return -ENOMEM;
+
+	cmds = link;
+
+	/* If we are in a fault the MMU will be reset soon */
+	if (test_bit(ADRENO_DEVICE_FAULT, &adreno_dev->priv)) {
+		kfree(link);
+		return 0;
+	}
+
+	cmds += adreno_iommu_set_pt_generate_cmds(rb, cmds, new_pt);
+
+	if ((unsigned int) (cmds - link) > (PAGE_SIZE / sizeof(unsigned int))) {
+		KGSL_DRV_ERR(KGSL_DEVICE(adreno_dev),
+			"Temp command buffer overflow\n");
+
+		/*
+		 * Temp buffer not large enough for pagetable switch commands.
+		 * Increase the size allocated above.
+		 */
+		BUG();
+	}
+	/*
+	 * This returns the per context timestamp but we need to
+	 * use the global timestamp for iommu clock disablement
+	 */
+	result = adreno_ringbuffer_issuecmds(rb,
+			KGSL_CMD_FLAGS_PMODE, link,
+			(unsigned int)(cmds - link));
+
+	kfree(link);
+	return result;
+}
+
+/**
+ * adreno_iommu_init() - Adreno iommu init
+ * @adreno_dev: Adreno device
+ */
+int adreno_iommu_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+
+	if (kgsl_mmu_get_mmutype(device) == KGSL_MMU_TYPE_NONE)
+		return 0;
+
+	/*
+	 * A nop is required in an indirect buffer when switching
+	 * pagetables in-stream
+	 */
+
+	kgsl_sharedmem_writel(device, &iommu->setstate,
+				KGSL_IOMMU_SETSTATE_NOP_OFFSET,
+				cp_packet(adreno_dev, CP_NOP, 1));
+
+	/* set iommu features here */
+	if (adreno_is_a420(adreno_dev))
+		device->mmu.features |= KGSL_MMU_FLUSH_TLB_ON_MAP;
+
+	/*
+	 * A5XX: per process PT is supported starting PFP 0x5FF064 me 0x5FF052
+	 * versions
+	 */
+	if (adreno_is_a5xx(adreno_dev) &&
+		!MMU_FEATURE(&device->mmu, KGSL_MMU_GLOBAL_PAGETABLE)) {
+		if ((adreno_compare_pfp_version(adreno_dev,
+				A5XX_PFP_PER_PROCESS_UCODE_VER) < 0) ||
+		    (adreno_compare_pm4_version(adreno_dev,
+				A5XX_PM4_PER_PROCESS_UCODE_VER) < 0)) {
+			KGSL_DRV_ERR(device,
+				"Invalid ucode for per process pagetables\n");
+			return -ENODEV;
+		}
+	}
+
+	/* Enable guard page MMU feature for A3xx and A4xx targets only */
+	if (adreno_is_a3xx(adreno_dev) || adreno_is_a4xx(adreno_dev))
+		device->mmu.features |= KGSL_MMU_NEED_GUARD_PAGE;
+
+	return 0;
+}
+
+/**
+ * adreno_iommu_set_pt_ctx() - Change the pagetable of the current RB
+ * @device: Pointer to device to which the rb belongs
+ * @rb: The RB pointer on which pagetable is to be changed
+ * @new_pt: The new pt the device will change to
+ * @drawctxt: The context whose pagetable the ringbuffer is switching to,
+ * NULL means KGSL_CONTEXT_GLOBAL
+ *
+ * Returns 0 on success else error code.
+ */
+int adreno_iommu_set_pt_ctx(struct adreno_ringbuffer *rb,
+			struct kgsl_pagetable *new_pt,
+			struct adreno_context *drawctxt,
+			unsigned long flags)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct kgsl_pagetable *cur_pt = device->mmu.defaultpagetable;
+	int result = 0;
+	int cpu_path = 0;
+
+	/* Just do the context switch incase of NOMMU */
+	if (kgsl_mmu_get_mmutype(device) == KGSL_MMU_TYPE_NONE) {
+		if ((!(flags & ADRENO_CONTEXT_SWITCH_FORCE_GPU)) &&
+			adreno_isidle(device))
+			_set_ctxt_cpu(rb, drawctxt);
+		else
+			result = _set_ctxt_gpu(rb, drawctxt);
+
+		return result;
+	}
+
+	if (rb->drawctxt_active)
+		cur_pt = rb->drawctxt_active->base.proc_priv->pagetable;
+
+	cpu_path = !(flags & ADRENO_CONTEXT_SWITCH_FORCE_GPU) &&
+		_ctx_switch_use_cpu_path(adreno_dev, new_pt, rb);
+
+	/* Pagetable switch */
+	if (new_pt != cur_pt) {
+		if (cpu_path)
+			result = _set_pagetable_cpu(rb, new_pt);
+		else
+			result = _set_pagetable_gpu(rb, new_pt);
+	}
+
+	if (result)
+		return result;
+
+	/* Context switch */
+	if (cpu_path)
+		_set_ctxt_cpu(rb, drawctxt);
+	else
+		result = _set_ctxt_gpu(rb, drawctxt);
+
+	return result;
+}
diff --git a/drivers/gpu/msm/adreno_iommu.h b/drivers/gpu/msm/adreno_iommu.h
new file mode 100644
index 0000000..5a6c2c5
--- /dev/null
+++ b/drivers/gpu/msm/adreno_iommu.h
@@ -0,0 +1,52 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __ADRENO_IOMMU_H
+#define __ADRENO_IOMMU_H
+
+#ifdef CONFIG_QCOM_KGSL_IOMMU
+int adreno_iommu_set_pt_ctx(struct adreno_ringbuffer *rb,
+			struct kgsl_pagetable *new_pt,
+			struct adreno_context *drawctxt,
+			unsigned long flags);
+
+int adreno_iommu_init(struct adreno_device *adreno_dev);
+
+unsigned int adreno_iommu_set_pt_generate_cmds(
+				struct adreno_ringbuffer *rb,
+				unsigned int *cmds,
+				struct kgsl_pagetable *pt);
+#else
+static inline int adreno_iommu_init(struct adreno_device *adreno_dev)
+{
+	return 0;
+}
+
+static inline int adreno_iommu_set_pt_ctx(struct adreno_ringbuffer *rb,
+			struct kgsl_pagetable *new_pt,
+			struct adreno_context *drawctxt,
+			unsigned long flags)
+{
+	return 0;
+}
+
+static inline unsigned int adreno_iommu_set_pt_generate_cmds(
+				struct adreno_ringbuffer *rb,
+				unsigned int *cmds,
+				struct kgsl_pagetable *pt)
+{
+	return 0;
+}
+
+#endif
+#endif
diff --git a/drivers/gpu/msm/adreno_perfcounter.c b/drivers/gpu/msm/adreno_perfcounter.c
new file mode 100644
index 0000000..c81ea69
--- /dev/null
+++ b/drivers/gpu/msm/adreno_perfcounter.c
@@ -0,0 +1,1014 @@
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/uaccess.h>
+
+#include "kgsl.h"
+#include "adreno.h"
+#include "adreno_perfcounter.h"
+#include "adreno_pm4types.h"
+#include "a5xx_reg.h"
+
+/* Bit flag for RBMM_PERFCTR_CTL */
+#define RBBM_PERFCTR_CTL_ENABLE		0x00000001
+
+#define VBIF2_PERF_CNT_SEL_MASK 0x7F
+/* offset of clear register from select register */
+#define VBIF2_PERF_CLR_REG_SEL_OFF 8
+/* offset of enable register from select register */
+#define VBIF2_PERF_EN_REG_SEL_OFF 16
+
+/* offset of clear register from the enable register */
+#define VBIF2_PERF_PWR_CLR_REG_EN_OFF 8
+
+#define REG_64BIT_VAL(hi, lo, val) (((((uint64_t) hi) << 32) | lo) + val)
+/*
+ * Return true if the countable is used and not broken
+ */
+static inline int active_countable(unsigned int countable)
+{
+	return ((countable != KGSL_PERFCOUNTER_NOT_USED) &&
+		(countable != KGSL_PERFCOUNTER_BROKEN));
+}
+
+/**
+ * adreno_perfcounter_init: Reserve kernel performance counters
+ * @adreno_dev: Pointer to an adreno_device struct
+ *
+ * The kernel needs/wants a certain group of performance counters for
+ * its own activities.  Reserve these performance counters at init time
+ * to ensure that they are always reserved for the kernel.  The performance
+ * counters used by the kernel can be obtained by the user, but these
+ * performance counters will remain active as long as the device is alive.
+ */
+void adreno_perfcounter_init(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->perfcounter_init)
+		gpudev->perfcounter_init(adreno_dev);
+}
+
+/**
+ * adreno_perfcounter_write() - Write the physical performance
+ * counter values.
+ * @adreno_dev -  Adreno device whose registers are to be written to.
+ * @reg - register address of the physical counter to which the value is
+ *		written to.
+ *
+ * This function loads the 64 bit saved value into the particular physical
+ * counter by enabling the corresponding bit in A3XX_RBBM_PERFCTR_LOAD_CMD*
+ * register.
+ */
+static void adreno_perfcounter_write(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_register *reg)
+{
+	unsigned int val, i;
+	int cmd[] = { ADRENO_REG_RBBM_PERFCTR_LOAD_CMD0,
+		ADRENO_REG_RBBM_PERFCTR_LOAD_CMD1,
+		ADRENO_REG_RBBM_PERFCTR_LOAD_CMD2,
+		ADRENO_REG_RBBM_PERFCTR_LOAD_CMD3 };
+
+	/* If not loadable then return quickly */
+	if (reg->load_bit < 0)
+		return;
+
+	/* Get the offset/cmd for loading */
+	i = reg->load_bit / 32;
+
+	/* Get the register bit offset for loading */
+	val = BIT(reg->load_bit & 31);
+
+	/* Write the saved value to PERFCTR_LOAD_VALUE* registers. */
+	adreno_writereg64(adreno_dev, ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_LO,
+			  ADRENO_REG_RBBM_PERFCTR_LOAD_VALUE_HI, reg->value);
+
+	/*
+	 * Set the load bit in PERFCTR_LOAD_CMD for the physical counter
+	 * we want to restore. The value in PERFCTR_LOAD_VALUE* is loaded
+	 * into the corresponding physical counter. The value for the select
+	 * register gets cleared once RBBM reads it so no need to clear the
+	 * select register afterwards.
+	 */
+	adreno_writereg(adreno_dev, cmd[i], val);
+}
+
+/**
+ * adreno_perfcounter_close() - Release counters initialized by
+ * adreno_perfcounter_close
+ * @adreno_dev: Pointer to an adreno_device struct
+ */
+void adreno_perfcounter_close(struct adreno_device *adreno_dev)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (gpudev->perfcounter_close)
+		gpudev->perfcounter_close(adreno_dev);
+}
+
+/**
+ * adreno_perfcounter_restore() - Restore performance counters
+ * @adreno_dev: adreno device to configure
+ *
+ * Load the physical performance counters with 64 bit value which are
+ * saved on GPU power collapse.
+ */
+void adreno_perfcounter_restore(struct adreno_device *adreno_dev)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	unsigned int counter, groupid;
+
+	if (counters == NULL)
+		return;
+
+	for (groupid = 0; groupid < counters->group_count; groupid++) {
+		group = &(counters->groups[groupid]);
+
+		/* Restore the counters for the group */
+		for (counter = 0; counter < group->reg_count; counter++) {
+			/* If not active or broken, skip this counter */
+			if (!active_countable(group->regs[counter].countable))
+				continue;
+
+			adreno_perfcounter_write(adreno_dev,
+					&group->regs[counter]);
+		}
+	}
+}
+
+/**
+ * adreno_perfcounter_save() - Save performance counters
+ * @adreno_dev: adreno device to configure
+ *
+ * Save the performance counter values before GPU power collapse.
+ * The saved values are restored on restart.
+ * This ensures physical counters are coherent across power-collapse.
+ */
+inline void adreno_perfcounter_save(struct adreno_device *adreno_dev)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	unsigned int counter, groupid;
+
+	if (counters == NULL)
+		return;
+
+	for (groupid = 0; groupid < counters->group_count; groupid++) {
+		group = &(counters->groups[groupid]);
+
+		/* Save the counter values for the group */
+		for (counter = 0; counter < group->reg_count; counter++) {
+			/* If not active or broken, skip this counter */
+			if (!active_countable(group->regs[counter].countable))
+				continue;
+
+			/* accumulate values for non-loadable counters */
+			if (group->regs[counter].load_bit >= 0)
+				group->regs[counter].value = 0;
+
+			group->regs[counter].value =
+				group->regs[counter].value +
+				adreno_perfcounter_read(adreno_dev, groupid,
+								counter);
+		}
+	}
+}
+
+static int adreno_perfcounter_enable(struct adreno_device *adreno_dev,
+	unsigned int group, unsigned int counter, unsigned int countable);
+
+/**
+ * adreno_perfcounter_start: Enable performance counters
+ * @adreno_dev: Adreno device to configure
+ *
+ * Ensure all performance counters are enabled that are allocated.  Since
+ * the device was most likely stopped, we can't trust that the counters
+ * are still valid so make it so.
+ */
+
+void adreno_perfcounter_start(struct adreno_device *adreno_dev)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	unsigned int i, j;
+
+	if (counters == NULL)
+		return;
+	/* group id iter */
+	for (i = 0; i < counters->group_count; i++) {
+		group = &(counters->groups[i]);
+
+		/* countable iter */
+		for (j = 0; j < group->reg_count; j++) {
+			if (!active_countable(group->regs[j].countable))
+				continue;
+
+			/*
+			 * The GPU has to be idle before calling the perfcounter
+			 * enable function, but since this function is called
+			 * during start we already know the GPU is idle.
+			 * Since the countable/counter pairs have already been
+			 * validated, there is no way for _enable() to fail so
+			 * no need to check the return code.
+			 */
+			adreno_perfcounter_enable(adreno_dev, i, j,
+					  group->regs[j].countable);
+		}
+	}
+}
+
+/**
+ * adreno_perfcounter_read_group() - Determine which countables are in counters
+ * @adreno_dev: Adreno device to configure
+ * @reads: List of kgsl_perfcounter_read_groups
+ * @count: Length of list
+ *
+ * Read the performance counters for the groupid/countable pairs and return
+ * the 64 bit result for each pair
+ */
+
+int adreno_perfcounter_read_group(struct adreno_device *adreno_dev,
+	struct kgsl_perfcounter_read_group __user *reads, unsigned int count)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	struct kgsl_perfcounter_read_group *list = NULL;
+	unsigned int i, j;
+	int ret = 0;
+
+	if (counters == NULL)
+		return -EINVAL;
+
+	/* sanity check params passed in */
+	if (reads == NULL || count == 0 || count > 100)
+		return -EINVAL;
+
+	list = kmalloc_array(count, sizeof(struct kgsl_perfcounter_read_group),
+			GFP_KERNEL);
+	if (!list)
+		return -ENOMEM;
+
+	if (copy_from_user(list, reads,
+			sizeof(struct kgsl_perfcounter_read_group) * count)) {
+		ret = -EFAULT;
+		goto done;
+	}
+
+	mutex_lock(&device->mutex);
+	ret = kgsl_active_count_get(device);
+	if (ret) {
+		mutex_unlock(&device->mutex);
+		goto done;
+	}
+
+	/* list iterator */
+	for (j = 0; j < count; j++) {
+
+		list[j].value = 0;
+
+		/* Verify that the group ID is within range */
+		if (list[j].groupid >= counters->group_count) {
+			ret = -EINVAL;
+			break;
+		}
+
+		group = &(counters->groups[list[j].groupid]);
+
+		/* group/counter iterator */
+		for (i = 0; i < group->reg_count; i++) {
+			if (group->regs[i].countable == list[j].countable) {
+				list[j].value = adreno_perfcounter_read(
+					adreno_dev, list[j].groupid, i);
+				break;
+			}
+		}
+	}
+
+	kgsl_active_count_put(device);
+	mutex_unlock(&device->mutex);
+
+	/* write the data */
+	if (ret == 0)
+		if (copy_to_user(reads, list,
+			sizeof(struct kgsl_perfcounter_read_group) * count))
+			ret = -EFAULT;
+
+done:
+	kfree(list);
+	return ret;
+}
+
+/**
+ * adreno_perfcounter_get_groupid() - Get the performance counter ID
+ * @adreno_dev: Adreno device
+ * @name: Performance counter group name string
+ *
+ * Get the groupid based on the name and return this ID
+ */
+
+int adreno_perfcounter_get_groupid(struct adreno_device *adreno_dev,
+					const char *name)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	int i;
+
+	if (name == NULL || counters == NULL)
+		return -EINVAL;
+
+	for (i = 0; i < counters->group_count; ++i) {
+		group = &(counters->groups[i]);
+
+		/* make sure there is a name for this group */
+		if (group->name == NULL)
+			continue;
+
+		/* verify name and length */
+		if (strlen(name) == strlen(group->name) &&
+			strcmp(group->name, name) == 0)
+			return i;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * adreno_perfcounter_get_name() - Get the group name
+ * @adreno_dev: Adreno device
+ * @groupid: Desired performance counter groupid
+ *
+ * Get the name based on the groupid and return it
+ */
+
+const char *adreno_perfcounter_get_name(struct adreno_device *adreno_dev,
+		unsigned int groupid)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+
+	if (counters != NULL && groupid < counters->group_count)
+		return counters->groups[groupid].name;
+
+	return NULL;
+}
+
+/**
+ * adreno_perfcounter_query_group: Determine which countables are in counters
+ * @adreno_dev: Adreno device to configure
+ * @groupid: Desired performance counter group
+ * @countables: Return list of all countables in the groups counters
+ * @count: Max length of the array
+ * @max_counters: max counters for the groupid
+ *
+ * Query the current state of counters for the group.
+ */
+
+int adreno_perfcounter_query_group(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int __user *countables,
+	unsigned int count, unsigned int *max_counters)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	unsigned int i, t;
+	int ret = 0;
+	unsigned int *buf;
+
+	*max_counters = 0;
+
+	if (counters == NULL || groupid >= counters->group_count)
+		return -EINVAL;
+
+	mutex_lock(&device->mutex);
+
+	group = &(counters->groups[groupid]);
+	*max_counters = group->reg_count;
+
+	/*
+	 * if NULL countable or *count of zero, return max reg_count in
+	 * *max_counters and return success
+	 */
+	if (countables == NULL || count == 0) {
+		mutex_unlock(&device->mutex);
+		return 0;
+	}
+
+	t = min_t(unsigned int, group->reg_count, count);
+
+	buf = kmalloc_array(t, sizeof(unsigned int), GFP_KERNEL);
+	if (buf == NULL) {
+		mutex_unlock(&device->mutex);
+		return -ENOMEM;
+	}
+
+	for (i = 0; i < t; i++)
+		buf[i] = group->regs[i].countable;
+
+	mutex_unlock(&device->mutex);
+
+	if (copy_to_user(countables, buf, sizeof(unsigned int) * t))
+		ret = -EFAULT;
+
+	kfree(buf);
+
+	return ret;
+}
+
+static inline void refcount_group(struct adreno_perfcount_group *group,
+	unsigned int reg, unsigned int flags,
+	unsigned int *lo, unsigned int *hi)
+{
+	if (flags & PERFCOUNTER_FLAG_KERNEL)
+		group->regs[reg].kernelcount++;
+	else
+		group->regs[reg].usercount++;
+
+	if (lo)
+		*lo = group->regs[reg].offset;
+
+	if (hi)
+		*hi = group->regs[reg].offset_hi;
+}
+
+/**
+ * adreno_perfcounter_get: Try to put a countable in an available counter
+ * @adreno_dev: Adreno device to configure
+ * @groupid: Desired performance counter group
+ * @countable: Countable desired to be in a counter
+ * @offset: Return offset of the LO counter assigned
+ * @offset_hi: Return offset of the HI counter assigned
+ * @flags: Used to setup kernel perf counters
+ *
+ * Try to place a countable in an available counter.  If the countable is
+ * already in a counter, reference count the counter/countable pair resource
+ * and return success
+ */
+
+int adreno_perfcounter_get(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int countable, unsigned int *offset,
+	unsigned int *offset_hi, unsigned int flags)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	unsigned int empty = -1;
+	int ret = 0;
+
+	/* always clear return variables */
+	if (offset)
+		*offset = 0;
+	if (offset_hi)
+		*offset_hi = 0;
+
+	if (counters == NULL)
+		return -EINVAL;
+
+	if (groupid >= counters->group_count)
+		return -EINVAL;
+
+	group = &(counters->groups[groupid]);
+
+	if (group->flags & ADRENO_PERFCOUNTER_GROUP_FIXED) {
+		/*
+		 * In fixed groups the countable equals the fixed register the
+		 * user wants. First make sure it is in range
+		 */
+
+		if (countable >= group->reg_count)
+			return -EINVAL;
+
+		/* If it is already reserved, just increase the refcounts */
+		if ((group->regs[countable].kernelcount != 0) ||
+			(group->regs[countable].usercount != 0)) {
+			refcount_group(group, countable, flags,
+				offset, offset_hi);
+			return 0;
+		}
+
+		empty = countable;
+	} else {
+		unsigned int i;
+
+		/*
+		 * Check if the countable is already associated with a counter.
+		 * Refcount and return the offset, otherwise, try and find an
+		 * empty counter and assign the countable to it.
+		 */
+
+		for (i = 0; i < group->reg_count; i++) {
+			if (group->regs[i].countable == countable) {
+				refcount_group(group, i, flags,
+					offset, offset_hi);
+				return 0;
+			} else if (group->regs[i].countable ==
+			KGSL_PERFCOUNTER_NOT_USED) {
+				/* keep track of unused counter */
+				empty = i;
+			}
+		}
+	}
+
+	/* no available counters, so do nothing else */
+	if (empty == -1)
+		return -EBUSY;
+
+	/* initialize the new counter */
+	group->regs[empty].countable = countable;
+
+	/* enable the new counter */
+	ret = adreno_perfcounter_enable(adreno_dev, groupid, empty, countable);
+	if (ret) {
+		/* Put back the perfcounter */
+		if (!(group->flags & ADRENO_PERFCOUNTER_GROUP_FIXED))
+			group->regs[empty].countable =
+				KGSL_PERFCOUNTER_NOT_USED;
+		return ret;
+	}
+
+	/* set initial kernel and user count */
+	if (flags & PERFCOUNTER_FLAG_KERNEL) {
+		group->regs[empty].kernelcount = 1;
+		group->regs[empty].usercount = 0;
+	} else {
+		group->regs[empty].kernelcount = 0;
+		group->regs[empty].usercount = 1;
+	}
+
+	if (offset)
+		*offset = group->regs[empty].offset;
+	if (offset_hi)
+		*offset_hi = group->regs[empty].offset_hi;
+
+	return ret;
+}
+
+
+/**
+ * adreno_perfcounter_put: Release a countable from counter resource
+ * @adreno_dev: Adreno device to configure
+ * @groupid: Desired performance counter group
+ * @countable: Countable desired to be freed from a  counter
+ * @flags: Flag to determine if kernel or user space request
+ *
+ * Put a performance counter/countable pair that was previously received.  If
+ * noone else is using the countable, free up the counter for others.
+ */
+int adreno_perfcounter_put(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int countable, unsigned int flags)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+	unsigned int i;
+
+	if (counters == NULL || groupid >= counters->group_count)
+		return -EINVAL;
+
+	group = &(counters->groups[groupid]);
+
+	/*
+	 * Find if the counter/countable pair is used currently.
+	 * Start cycling through registers in the bank.
+	 */
+	for (i = 0; i < group->reg_count; i++) {
+		/* check if countable assigned is what we are looking for */
+		if (group->regs[i].countable == countable) {
+			/* found pair, book keep count based on request type */
+			if (flags & PERFCOUNTER_FLAG_KERNEL &&
+					group->regs[i].kernelcount > 0)
+				group->regs[i].kernelcount--;
+			else if (group->regs[i].usercount > 0)
+				group->regs[i].usercount--;
+			else
+				break;
+
+			/* mark available if not used anymore */
+			if (group->regs[i].kernelcount == 0 &&
+					group->regs[i].usercount == 0)
+				group->regs[i].countable =
+					KGSL_PERFCOUNTER_NOT_USED;
+
+			return 0;
+		}
+	}
+
+	return -EINVAL;
+}
+
+static void _perfcounter_enable_vbif(struct adreno_device *adreno_dev,
+		struct adreno_perfcounters *counters, unsigned int counter,
+		unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg;
+
+	reg = &counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF].regs[counter];
+	/* Write 1, followed by 0 to CLR register for clearing the counter */
+	kgsl_regwrite(device, reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 1);
+	kgsl_regwrite(device, reg->select - VBIF2_PERF_CLR_REG_SEL_OFF, 0);
+	kgsl_regwrite(device, reg->select, countable & VBIF2_PERF_CNT_SEL_MASK);
+	/* enable reg is 8 DWORDS before select reg */
+	kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF, 1);
+	reg->value = 0;
+}
+
+static void _perfcounter_enable_vbif_pwr(struct adreno_device *adreno_dev,
+		struct adreno_perfcounters *counters, unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg;
+
+	reg = &counters->groups[KGSL_PERFCOUNTER_GROUP_VBIF_PWR].regs[counter];
+	/* Write 1, followed by 0 to CLR register for clearing the counter */
+	kgsl_regwrite(device, reg->select + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 1);
+	kgsl_regwrite(device, reg->select + VBIF2_PERF_PWR_CLR_REG_EN_OFF, 0);
+	kgsl_regwrite(device, reg->select, 1);
+	reg->value = 0;
+}
+
+static void _power_counter_enable_alwayson(struct adreno_device *adreno_dev,
+				struct adreno_perfcounters *counters)
+{
+	kgsl_regwrite(KGSL_DEVICE(adreno_dev),
+		A5XX_GPMU_ALWAYS_ON_COUNTER_RESET, 1);
+	counters->groups[KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR].regs[0].value = 0;
+}
+
+static void _power_counter_enable_gpmu(struct adreno_device *adreno_dev,
+		struct adreno_perfcounters *counters, unsigned int group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg;
+	unsigned int shift = counter << 3;
+
+	if (adreno_is_a530(adreno_dev)) {
+		if (countable > 43)
+			return;
+	} else if (adreno_is_a540(adreno_dev)) {
+		if (countable > 47)
+			return;
+	} else
+		/* return on platforms that have no GPMU */
+		return;
+
+	reg = &counters->groups[group].regs[counter];
+	kgsl_regrmw(device, reg->select, 0xff << shift, countable << shift);
+	kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1);
+	reg->value = 0;
+}
+
+static void _power_counter_enable_default(struct adreno_device *adreno_dev,
+		struct adreno_perfcounters *counters, unsigned int group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg;
+
+	reg = &counters->groups[group].regs[counter];
+	kgsl_regwrite(device, reg->select, countable);
+	kgsl_regwrite(device, A5XX_GPMU_POWER_COUNTER_ENABLE, 1);
+	reg->value = 0;
+}
+
+static int _perfcounter_enable_default(struct adreno_device *adreno_dev,
+		struct adreno_perfcounters *counters, unsigned int group,
+		unsigned int counter, unsigned int countable)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg;
+	int i;
+	int ret = 0;
+
+	/*
+	 * check whether the countable is valid or not by matching it against
+	 * the list on invalid countables
+	 */
+	if (gpudev->invalid_countables) {
+		struct adreno_invalid_countables invalid_countable =
+			gpudev->invalid_countables[group];
+		for (i = 0; i < invalid_countable.num_countables; i++)
+			if (countable == invalid_countable.countables[i])
+				return -EACCES;
+	}
+	reg = &(counters->groups[group].regs[counter]);
+
+	if (test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)) {
+		struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[0];
+		unsigned int buf[4];
+		unsigned int *cmds = buf;
+		int ret;
+
+		cmds += cp_wait_for_idle(adreno_dev, cmds);
+		*cmds++ = cp_register(adreno_dev, reg->select, 1);
+		*cmds++ = countable;
+		/* submit to highest priority RB always */
+		ret = adreno_ringbuffer_issuecmds(rb, 0, buf, cmds-buf);
+		if (ret)
+			return ret;
+		/*
+		 * schedule dispatcher to make sure rb[0] is run, because
+		 * if the current RB is not rb[0] and gpu is idle then
+		 * rb[0] will not get scheduled to run
+		 */
+		if (adreno_dev->cur_rb != rb)
+			adreno_dispatcher_schedule(device);
+		/* wait for the above commands submitted to complete */
+		ret = adreno_ringbuffer_waittimestamp(rb, rb->timestamp,
+				ADRENO_IDLE_TIMEOUT);
+		if (ret) {
+			/*
+			 * If we were woken up because of cancelling rb events
+			 * either due to soft reset or adreno_stop, ignore the
+			 * error and return 0 here. The perfcounter is already
+			 * set up in software and it will be programmed in
+			 * hardware when we wake up or come up after soft reset,
+			 * by adreno_perfcounter_restore.
+			 */
+			if (ret == -EAGAIN)
+				ret = 0;
+			else
+				KGSL_DRV_ERR(device,
+				"Perfcounter %u/%u/%u start via commands failed %d\n",
+				group, counter, countable, ret);
+		}
+	} else {
+		/* Select the desired perfcounter */
+		kgsl_regwrite(device, reg->select, countable);
+	}
+
+	if (!ret)
+		reg->value = 0;
+	return 0;
+}
+
+/**
+ * adreno_perfcounter_enable - Configure a performance counter for a countable
+ * @adreno_dev -  Adreno device to configure
+ * @group - Desired performance counter group
+ * @counter - Desired performance counter in the group
+ * @countable - Desired countable
+ *
+ * Function is used for adreno cores
+ * Physically set up a counter within a group with the desired countable
+ * Return 0 on success else error code
+ */
+static int adreno_perfcounter_enable(struct adreno_device *adreno_dev,
+	unsigned int group, unsigned int counter, unsigned int countable)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_gpudev *gpudev  = ADRENO_GPU_DEVICE(adreno_dev);
+
+	if (counters == NULL)
+		return -EINVAL;
+
+	if (group >= counters->group_count)
+		return -EINVAL;
+
+	if (counter >= counters->groups[group].reg_count)
+		return -EINVAL;
+
+	switch (group) {
+	case KGSL_PERFCOUNTER_GROUP_ALWAYSON:
+		/* alwayson counter is global, so init value is 0 */
+		break;
+	case KGSL_PERFCOUNTER_GROUP_PWR:
+		if (gpudev->enable_pwr_counters)
+			return gpudev->enable_pwr_counters(adreno_dev, counter);
+		return 0;
+	case KGSL_PERFCOUNTER_GROUP_VBIF:
+		if (countable > VBIF2_PERF_CNT_SEL_MASK)
+			return -EINVAL;
+		_perfcounter_enable_vbif(adreno_dev, counters, counter,
+							countable);
+		break;
+	case KGSL_PERFCOUNTER_GROUP_VBIF_PWR:
+		_perfcounter_enable_vbif_pwr(adreno_dev, counters, counter);
+		break;
+	case KGSL_PERFCOUNTER_GROUP_SP_PWR:
+	case KGSL_PERFCOUNTER_GROUP_TP_PWR:
+	case KGSL_PERFCOUNTER_GROUP_RB_PWR:
+	case KGSL_PERFCOUNTER_GROUP_CCU_PWR:
+	case KGSL_PERFCOUNTER_GROUP_UCHE_PWR:
+	case KGSL_PERFCOUNTER_GROUP_CP_PWR:
+		_power_counter_enable_default(adreno_dev, counters, group,
+						counter, countable);
+		break;
+	case KGSL_PERFCOUNTER_GROUP_GPMU_PWR:
+		_power_counter_enable_gpmu(adreno_dev, counters, group, counter,
+				countable);
+		break;
+	case KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR:
+		_power_counter_enable_alwayson(adreno_dev, counters);
+		break;
+	case KGSL_PERFCOUNTER_GROUP_RBBM:
+		/* The following rbbm countable is not reliable on a540 */
+		if (adreno_is_a540(adreno_dev))
+			if (countable == A5XX_RBBM_ALWAYS_COUNT)
+				return -EINVAL;
+	default:
+		return _perfcounter_enable_default(adreno_dev, counters, group,
+				counter, countable);
+	}
+
+	return 0;
+}
+
+static uint64_t _perfcounter_read_alwayson(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_group *group, unsigned int counter)
+{
+	uint64_t val = 0;
+
+	adreno_readreg64(adreno_dev, ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
+				   ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI, &val);
+
+	return val + group->regs[counter].value;
+}
+
+static uint64_t _perfcounter_read_pwr(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_group *group, unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg;
+	unsigned int in = 0, out, lo = 0, hi = 0;
+	unsigned int enable_bit;
+
+	reg = &group->regs[counter];
+
+	/* Remember, counter 0 is not emulated on 5XX */
+	if (adreno_is_a5xx(adreno_dev) && (counter == 0))
+		return -EINVAL;
+
+	if (adreno_is_a3xx(adreno_dev)) {
+		/* On A3XX we need to freeze the counter so we can read it */
+		if (counter == 0)
+			enable_bit = 0x00010000;
+		else
+			enable_bit = 0x00020000;
+
+		/* freeze counter */
+		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_RBBM_CTL, &in);
+		out = (in & ~enable_bit);
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_RBBM_CTL, out);
+	}
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* restore the counter control value */
+	if (adreno_is_a3xx(adreno_dev))
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_RBBM_CTL, in);
+
+	return REG_64BIT_VAL(hi, lo, reg->value);
+}
+
+static uint64_t _perfcounter_read_vbif(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_group *group, unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg;
+	unsigned int lo = 0, hi = 0;
+
+	reg = &group->regs[counter];
+
+	/* freeze counter */
+	if (adreno_is_a3xx(adreno_dev))
+		kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF,
+							0);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* un-freeze counter */
+	if (adreno_is_a3xx(adreno_dev))
+		kgsl_regwrite(device, reg->select - VBIF2_PERF_EN_REG_SEL_OFF,
+							1);
+
+	return REG_64BIT_VAL(hi, lo, reg->value);
+}
+
+static uint64_t _perfcounter_read_vbif_pwr(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_group *group, unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg;
+	unsigned int lo = 0, hi = 0;
+
+	reg = &group->regs[counter];
+
+	/* freeze counter */
+	if (adreno_is_a3xx(adreno_dev))
+		kgsl_regwrite(device, reg->select, 0);
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* un-freeze counter */
+	if (adreno_is_a3xx(adreno_dev))
+		kgsl_regwrite(device, reg->select, 1);
+
+	return REG_64BIT_VAL(hi, lo, reg->value);
+}
+
+static uint64_t _perfcounter_read_pwrcntr(struct adreno_device *adreno_dev,
+	struct adreno_perfcount_group *group, unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg;
+	unsigned int lo = 0, hi = 0;
+
+	reg = &group->regs[counter];
+
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	return REG_64BIT_VAL(hi, lo, reg->value);
+}
+
+static uint64_t _perfcounter_read_default(struct adreno_device *adreno_dev,
+		struct adreno_perfcount_group *group, unsigned int counter)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_perfcount_register *reg;
+	unsigned int lo = 0, hi = 0;
+	unsigned int in = 0, out;
+
+	reg = &group->regs[counter];
+
+	/* Freeze the counter */
+	if (adreno_is_a3xx(adreno_dev)) {
+		adreno_readreg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_CTL, &in);
+		out = in & ~RBBM_PERFCTR_CTL_ENABLE;
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_CTL, out);
+	}
+
+	/* Read the values */
+	kgsl_regread(device, reg->offset, &lo);
+	kgsl_regread(device, reg->offset_hi, &hi);
+
+	/* Re-Enable the counter */
+	if (adreno_is_a3xx(adreno_dev))
+		adreno_writereg(adreno_dev, ADRENO_REG_RBBM_PERFCTR_CTL, in);
+
+	return REG_64BIT_VAL(hi, lo, 0);
+}
+
+/**
+ * adreno_perfcounter_read() - Reads a performance counter
+ * @adreno_dev: The device on which the counter is running
+ * @group: The group of the counter
+ * @counter: The counter within the group
+ *
+ * Function is used to read the counter of adreno devices
+ * Returns the 64 bit counter value on success else 0.
+ */
+uint64_t adreno_perfcounter_read(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int counter)
+{
+	struct adreno_perfcounters *counters = ADRENO_PERFCOUNTERS(adreno_dev);
+	struct adreno_perfcount_group *group;
+
+	/* Lets hope this doesn't fail. Now subfunctions don't need to check */
+	if (counters == NULL)
+		return 0;
+
+	if (groupid >= counters->group_count)
+		return 0;
+
+	group = &counters->groups[groupid];
+
+	if (counter >= group->reg_count)
+		return 0;
+
+	switch (groupid) {
+	case KGSL_PERFCOUNTER_GROUP_ALWAYSON:
+		return _perfcounter_read_alwayson(adreno_dev, group, counter);
+	case KGSL_PERFCOUNTER_GROUP_VBIF_PWR:
+		return _perfcounter_read_vbif_pwr(adreno_dev, group, counter);
+	case KGSL_PERFCOUNTER_GROUP_VBIF:
+		return _perfcounter_read_vbif(adreno_dev, group, counter);
+	case KGSL_PERFCOUNTER_GROUP_PWR:
+		return _perfcounter_read_pwr(adreno_dev, group, counter);
+	case KGSL_PERFCOUNTER_GROUP_SP_PWR:
+	case KGSL_PERFCOUNTER_GROUP_TP_PWR:
+	case KGSL_PERFCOUNTER_GROUP_RB_PWR:
+	case KGSL_PERFCOUNTER_GROUP_CCU_PWR:
+	case KGSL_PERFCOUNTER_GROUP_UCHE_PWR:
+	case KGSL_PERFCOUNTER_GROUP_CP_PWR:
+	case KGSL_PERFCOUNTER_GROUP_GPMU_PWR:
+	case KGSL_PERFCOUNTER_GROUP_ALWAYSON_PWR:
+		return _perfcounter_read_pwrcntr(adreno_dev, group, counter);
+	default:
+		return _perfcounter_read_default(adreno_dev, group, counter);
+	}
+}
diff --git a/drivers/gpu/msm/adreno_perfcounter.h b/drivers/gpu/msm/adreno_perfcounter.h
new file mode 100644
index 0000000..8c4db38
--- /dev/null
+++ b/drivers/gpu/msm/adreno_perfcounter.h
@@ -0,0 +1,141 @@
+/* Copyright (c) 2008-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_PERFCOUNTER_H
+#define __ADRENO_PERFCOUNTER_H
+
+#include "adreno.h"
+
+struct adreno_device;
+
+/* ADRENO_PERFCOUNTERS - Given an adreno device, return the perfcounters list */
+#define ADRENO_PERFCOUNTERS(_a) \
+	(ADRENO_GPU_DEVICE(_a) ? ADRENO_GPU_DEVICE(_a)->perfcounters : NULL)
+
+#define PERFCOUNTER_FLAG_NONE 0x0
+#define PERFCOUNTER_FLAG_KERNEL 0x1
+
+/* Structs to maintain the list of active performance counters */
+
+/**
+ * struct adreno_perfcount_register: register state
+ * @countable: countable the register holds
+ * @kernelcount: number of user space users of the register
+ * @usercount: number of kernel users of the register
+ * @offset: register hardware offset
+ * @load_bit: The bit number in LOAD register which corresponds to this counter
+ * @select: The countable register offset
+ * @value: The 64 bit countable register value
+ */
+struct adreno_perfcount_register {
+	unsigned int countable;
+	unsigned int kernelcount;
+	unsigned int usercount;
+	unsigned int offset;
+	unsigned int offset_hi;
+	int load_bit;
+	unsigned int select;
+	uint64_t value;
+};
+
+/**
+ * struct adreno_perfcount_group: registers for a hardware group
+ * @regs: available registers for this group
+ * @reg_count: total registers for this group
+ * @name: group name for this group
+ */
+struct adreno_perfcount_group {
+	struct adreno_perfcount_register *regs;
+	unsigned int reg_count;
+	const char *name;
+	unsigned long flags;
+};
+
+/*
+ * ADRENO_PERFCOUNTER_GROUP_FIXED indicates that a perfcounter group is fixed -
+ * instead of having configurable countables like the other groups, registers in
+ * fixed groups have a hardwired countable.  So when the user requests a
+ * countable in one of these groups, that countable should be used as the
+ * register offset to return
+ */
+
+#define ADRENO_PERFCOUNTER_GROUP_FIXED BIT(0)
+
+/**
+ * adreno_perfcounts: all available perfcounter groups
+ * @groups: available groups for this device
+ * @group_count: total groups for this device
+ */
+struct adreno_perfcounters {
+	struct adreno_perfcount_group *groups;
+	unsigned int group_count;
+};
+
+/**
+ * adreno_invalid_countabless: Invalid countables that do not work properly
+ * @countables: List of unusable countables
+ * @num_countables: Number of unusable countables
+ */
+struct adreno_invalid_countables {
+	const unsigned int *countables;
+	int num_countables;
+};
+
+#define ADRENO_PERFCOUNTER_GROUP_FLAGS(core, offset, name, flags) \
+	[KGSL_PERFCOUNTER_GROUP_##offset] = { core##_perfcounters_##name, \
+	ARRAY_SIZE(core##_perfcounters_##name), __stringify(name), flags }
+
+#define ADRENO_PERFCOUNTER_GROUP(core, offset, name) \
+	ADRENO_PERFCOUNTER_GROUP_FLAGS(core, offset, name, 0)
+
+#define ADRENO_POWER_COUNTER_GROUP(core, offset, name) \
+	[KGSL_PERFCOUNTER_GROUP_##offset##_PWR] = { core##_pwrcounters_##name, \
+	ARRAY_SIZE(core##_pwrcounters_##name), __stringify(name##_pwr), 0}
+
+#define ADRENO_PERFCOUNTER_INVALID_COUNTABLE(name, off) \
+	[KGSL_PERFCOUNTER_GROUP_##off] = { name##_invalid_countables, \
+				ARRAY_SIZE(name##_invalid_countables) }
+
+int adreno_perfcounter_query_group(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int __user *countables,
+	unsigned int count, unsigned int *max_counters);
+
+int adreno_perfcounter_read_group(struct adreno_device *adreno_dev,
+	struct kgsl_perfcounter_read_group __user *reads, unsigned int count);
+
+void adreno_perfcounter_close(struct adreno_device *adreno_dev);
+
+void adreno_perfcounter_restore(struct adreno_device *adreno_dev);
+
+void adreno_perfcounter_save(struct adreno_device *adreno_dev);
+
+void adreno_perfcounter_start(struct adreno_device *adreno_dev);
+
+void adreno_perfcounter_init(struct adreno_device *adreno_dev);
+
+int adreno_perfcounter_get_groupid(struct adreno_device *adreno_dev,
+					const char *name);
+
+uint64_t adreno_perfcounter_read(struct adreno_device *adreno_dev,
+	unsigned int group, unsigned int counter);
+
+const char *adreno_perfcounter_get_name(struct adreno_device
+					*adreno_dev, unsigned int groupid);
+
+int adreno_perfcounter_get(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int countable, unsigned int *offset,
+	unsigned int *offset_hi, unsigned int flags);
+
+int adreno_perfcounter_put(struct adreno_device *adreno_dev,
+	unsigned int groupid, unsigned int countable, unsigned int flags);
+
+#endif /* __ADRENO_PERFCOUNTER_H */
diff --git a/drivers/gpu/msm/adreno_pm4types.h b/drivers/gpu/msm/adreno_pm4types.h
new file mode 100644
index 0000000..fceceda
--- /dev/null
+++ b/drivers/gpu/msm/adreno_pm4types.h
@@ -0,0 +1,389 @@
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_PM4TYPES_H
+#define __ADRENO_PM4TYPES_H
+
+#include "adreno.h"
+
+#define CP_TYPE0_PKT	(0 << 30)
+#define CP_TYPE3_PKT	(3 << 30)
+#define CP_TYPE4_PKT	(4 << 28)
+#define CP_TYPE7_PKT	(7 << 28)
+
+#define PM4_TYPE4_PKT_SIZE_MAX  128
+
+/* type3 packets */
+
+/* Enable preemption flag */
+#define CP_PREEMPT_ENABLE 0x1C
+/* Preemption token command on which preemption occurs */
+#define CP_PREEMPT_TOKEN 0x1E
+/* Bit to set in CP_PREEMPT_TOKEN ordinal for interrupt on preemption */
+#define CP_PREEMPT_ORDINAL_INTERRUPT 24
+
+/* Wait for memory writes to complete */
+#define CP_WAIT_MEM_WRITES     0x12
+
+/* initialize CP's micro-engine */
+#define CP_ME_INIT		0x48
+
+/* skip N 32-bit words to get to the next packet */
+#define CP_NOP			0x10
+
+/* indirect buffer dispatch.  same as IB, but init is pipelined */
+#define CP_INDIRECT_BUFFER_PFD	0x37
+
+/* wait for the IDLE state of the engine */
+#define CP_WAIT_FOR_IDLE	0x26
+
+/* wait until a register or memory location is a specific value */
+#define CP_WAIT_REG_MEM	0x3c
+
+/* wait until a register location is equal to a specific value */
+#define CP_WAIT_REG_EQ		0x52
+
+/* switches SMMU pagetable, used on a5xx only */
+#define CP_SMMU_TABLE_UPDATE 0x53
+
+/* register read/modify/write */
+#define CP_REG_RMW		0x21
+
+/* Set binning configuration registers */
+#define CP_SET_BIN_DATA             0x2f
+
+/* reads register in chip and writes to memory */
+#define CP_REG_TO_MEM		0x3e
+
+/* write N 32-bit words to memory */
+#define CP_MEM_WRITE		0x3d
+
+/* conditional execution of a sequence of packets */
+#define CP_COND_EXEC		0x44
+
+/* conditional write to memory or register */
+#define CP_COND_WRITE		0x45
+
+/* generate an event that creates a write to memory when completed */
+#define CP_EVENT_WRITE		0x46
+
+/* initiate fetch of index buffer and draw */
+#define CP_DRAW_INDX		0x22
+
+/* New draw packets defined for A4XX */
+#define CP_DRAW_INDX_OFFSET	0x38
+#define CP_DRAW_INDIRECT	0x28
+#define CP_DRAW_INDX_INDIRECT	0x29
+#define CP_DRAW_AUTO		0x24
+
+/* load constant into chip and to memory */
+#define CP_SET_CONSTANT	0x2d
+
+/* selective invalidation of state pointers */
+#define CP_INVALIDATE_STATE	0x3b
+
+/* generate interrupt from the command stream */
+#define CP_INTERRUPT		0x40
+
+/* A5XX Enable yield in RB only */
+#define CP_YIELD_ENABLE 0x1C
+
+/* Enable/Disable/Defer A5x global preemption model */
+#define CP_PREEMPT_ENABLE_GLOBAL    0x69
+
+/* Enable/Disable A5x local preemption model */
+#define CP_PREEMPT_ENABLE_LOCAL     0x6A
+
+/* Yeild token on a5xx similar to CP_PREEMPT on a4xx */
+#define CP_CONTEXT_SWITCH_YIELD     0x6B
+
+/* Inform CP about current render mode (needed for a5xx preemption) */
+#define CP_SET_RENDER_MODE          0x6C
+
+/* Write register, ignoring context state for context sensitive registers */
+#define CP_REG_WR_NO_CTXT  0x78
+
+/*
+ * for A4xx
+ * Write to register with address that does not fit into type-0 pkt
+ */
+#define CP_WIDE_REG_WRITE           0x74
+
+
+/* PFP waits until the FIFO between the PFP and the ME is empty */
+#define CP_WAIT_FOR_ME		0x13
+
+#define CP_SET_PROTECTED_MODE  0x5f /* sets the register protection mode */
+
+/* Used to switch GPU between secure and non-secure modes */
+#define CP_SET_SECURE_MODE 0x66
+
+#define CP_BOOTSTRAP_UCODE  0x6f /* bootstraps microcode */
+
+/*
+ * for a3xx
+ */
+
+#define CP_LOAD_STATE 0x30 /* load high level sequencer command */
+
+/* Conditionally load a IB based on a flag */
+#define CP_COND_INDIRECT_BUFFER_PFE 0x3A /* prefetch enabled */
+#define CP_COND_INDIRECT_BUFFER_PFD 0x32 /* prefetch disabled */
+
+/* Load a buffer with pre-fetch enabled */
+#define CP_INDIRECT_BUFFER_PFE 0x3F
+
+#define CP_EXEC_CL 0x31
+
+/* (A4x) save PM4 stream pointers to execute upon a visible draw */
+#define CP_SET_DRAW_STATE 0x43
+
+#define CP_LOADSTATE_DSTOFFSET_SHIFT 0x00000000
+#define CP_LOADSTATE_STATESRC_SHIFT 0x00000010
+#define CP_LOADSTATE_STATEBLOCKID_SHIFT 0x00000013
+#define CP_LOADSTATE_NUMOFUNITS_SHIFT 0x00000016
+#define CP_LOADSTATE_STATETYPE_SHIFT 0x00000000
+#define CP_LOADSTATE_EXTSRCADDR_SHIFT 0x00000002
+
+static inline uint pm4_calc_odd_parity_bit(uint val)
+{
+	return (0x9669 >> (0xf & ((val) ^
+	((val) >> 4) ^ ((val) >> 8) ^ ((val) >> 12) ^
+	((val) >> 16) ^ ((val) >> 20) ^ ((val) >> 24) ^
+	((val) >> 28)))) & 1;
+}
+
+/*
+ * PM4 packet header functions
+ * For all the packet functions the passed in count should be the size of the
+ * payload excluding the header
+ */
+static inline uint cp_type0_packet(uint regindx, uint cnt)
+{
+	return CP_TYPE0_PKT | ((cnt-1) << 16) | ((regindx) & 0x7FFF);
+}
+
+static inline uint cp_type3_packet(uint opcode, uint cnt)
+{
+	return CP_TYPE3_PKT | ((cnt-1) << 16) | (((opcode) & 0xFF) << 8);
+}
+
+static inline uint cp_type4_packet(uint opcode, uint cnt)
+{
+	return CP_TYPE4_PKT | ((cnt) << 0) |
+	(pm4_calc_odd_parity_bit(cnt) << 7) |
+	(((opcode) & 0x3FFFF) << 8) |
+	((pm4_calc_odd_parity_bit(opcode) << 27));
+}
+
+static inline uint cp_type7_packet(uint opcode, uint cnt)
+{
+	return CP_TYPE7_PKT | ((cnt) << 0) |
+	(pm4_calc_odd_parity_bit(cnt) << 15) |
+	(((opcode) & 0x7F) << 16) |
+	((pm4_calc_odd_parity_bit(opcode) << 23));
+
+}
+
+#define pkt_is_type0(pkt) (((pkt) & 0XC0000000) == CP_TYPE0_PKT)
+
+#define type0_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
+#define type0_pkt_offset(pkt) ((pkt) & 0x7FFF)
+
+/*
+ * Check both for the type3 opcode and make sure that the reserved bits [1:7]
+ * and 15 are 0
+ */
+
+#define pkt_is_type3(pkt) \
+	((((pkt) & 0xC0000000) == CP_TYPE3_PKT) && \
+	 (((pkt) & 0x80FE) == 0))
+
+#define cp_type3_opcode(pkt) (((pkt) >> 8) & 0xFF)
+#define type3_pkt_size(pkt) ((((pkt) >> 16) & 0x3FFF) + 1)
+
+#define pkt_is_type4(pkt) \
+	((((pkt) & 0xF0000000) == CP_TYPE4_PKT) && \
+	 ((((pkt) >> 27) & 0x1) == \
+	 pm4_calc_odd_parity_bit(cp_type4_base_index_one_reg_wr(pkt))) \
+	 && ((((pkt) >> 7) & 0x1) == \
+	 pm4_calc_odd_parity_bit(type4_pkt_size(pkt))))
+
+#define cp_type4_base_index_one_reg_wr(pkt) (((pkt) >> 8) & 0x7FFFF)
+#define type4_pkt_size(pkt) ((pkt) & 0x7F)
+
+#define pkt_is_type7(pkt) \
+	((((pkt) & 0xF0000000) == CP_TYPE7_PKT) && \
+	 (((pkt) & 0x0F000000) == 0) && \
+	 ((((pkt) >> 23) & 0x1) == \
+	 pm4_calc_odd_parity_bit(cp_type7_opcode(pkt))) \
+	 && ((((pkt) >> 15) & 0x1) == \
+	 pm4_calc_odd_parity_bit(type7_pkt_size(pkt))))
+
+#define cp_type7_opcode(pkt) (((pkt) >> 16) & 0x7F)
+#define type7_pkt_size(pkt) ((pkt) & 0x3FFF)
+
+/* dword base address of the GFX decode space */
+#define SUBBLOCK_OFFSET(reg) ((unsigned int)((reg) - (0x2000)))
+
+/* gmem command buffer length */
+#define CP_REG(reg) ((0x4 << 16) | (SUBBLOCK_OFFSET(reg)))
+
+/* Return true if the hardware uses the legacy (A4XX and older) PM4 format */
+#define ADRENO_LEGACY_PM4(_d) (ADRENO_GPUREV(_d) < 500)
+
+/**
+ * cp_packet - Generic CP packet to support different opcodes on
+ * different GPU cores.
+ * @adreno_dev: The adreno device
+ * @opcode: Operation for cp packet
+ * @size: size for cp packet
+ */
+static inline uint cp_packet(struct adreno_device *adreno_dev,
+				int opcode, uint size)
+{
+	if (ADRENO_LEGACY_PM4(adreno_dev))
+		return cp_type3_packet(opcode, size);
+
+	return cp_type7_packet(opcode, size);
+}
+
+/**
+ * cp_mem_packet - Generic CP memory packet to support different
+ * opcodes on different GPU cores.
+ * @adreno_dev: The adreno device
+ * @opcode: mem operation for cp packet
+ * @size: size for cp packet
+ * @num_mem: num of mem access
+ */
+static inline uint cp_mem_packet(struct adreno_device *adreno_dev,
+				int opcode, uint size, uint num_mem)
+{
+	if (ADRENO_LEGACY_PM4(adreno_dev))
+		return cp_type3_packet(opcode, size);
+
+	return cp_type7_packet(opcode, size + num_mem);
+}
+
+/* Return 1 if the command is an indirect buffer of any kind */
+static inline int adreno_cmd_is_ib(struct adreno_device *adreno_dev,
+					unsigned int cmd)
+{
+	return cmd == cp_mem_packet(adreno_dev,
+			CP_INDIRECT_BUFFER_PFE, 2, 1) ||
+		cmd == cp_mem_packet(adreno_dev,
+			CP_INDIRECT_BUFFER_PFD, 2, 1) ||
+		cmd == cp_mem_packet(adreno_dev,
+			CP_COND_INDIRECT_BUFFER_PFE, 2, 1) ||
+		cmd == cp_mem_packet(adreno_dev,
+			CP_COND_INDIRECT_BUFFER_PFD, 2, 1);
+}
+
+/**
+ * cp_gpuaddr - Generic function to add 64bit and 32bit gpuaddr
+ * to pm4 commands
+ * @adreno_dev: The adreno device
+ * @cmds: command pointer to add gpuaddr
+ * @gpuaddr: gpuaddr to add
+ */
+static inline uint cp_gpuaddr(struct adreno_device *adreno_dev,
+		   uint *cmds, uint64_t gpuaddr)
+{
+	uint *start = cmds;
+
+	if (ADRENO_LEGACY_PM4(adreno_dev))
+		*cmds++ = (uint)gpuaddr;
+	else {
+		*cmds++ = lower_32_bits(gpuaddr);
+		*cmds++ = upper_32_bits(gpuaddr);
+	}
+	return cmds - start;
+}
+
+/**
+ * cp_register - Generic function for gpu register operation
+ * @adreno_dev: The adreno device
+ * @reg: GPU register
+ * @size: count for PM4 operation
+ */
+static inline uint cp_register(struct adreno_device *adreno_dev,
+			unsigned int reg, unsigned int size)
+{
+	if (ADRENO_LEGACY_PM4(adreno_dev))
+		return cp_type0_packet(reg, size);
+
+	return cp_type4_packet(reg, size);
+}
+
+/**
+ * cp_wait_for_me - common function for WAIT_FOR_ME
+ * @adreno_dev: The adreno device
+ * @cmds: command pointer to add gpuaddr
+ */
+static inline uint cp_wait_for_me(struct adreno_device *adreno_dev,
+				uint *cmds)
+{
+	uint *start = cmds;
+
+	if (ADRENO_LEGACY_PM4(adreno_dev)) {
+		*cmds++ = cp_type3_packet(CP_WAIT_FOR_ME, 1);
+		*cmds++ = 0;
+	} else
+		*cmds++ = cp_type7_packet(CP_WAIT_FOR_ME, 0);
+
+	return cmds - start;
+}
+
+/**
+ * cp_wait_for_idle - common function for WAIT_FOR_IDLE
+ * @adreno_dev: The adreno device
+ * @cmds: command pointer to add gpuaddr
+ */
+static inline uint cp_wait_for_idle(struct adreno_device *adreno_dev,
+				uint *cmds)
+{
+	uint *start = cmds;
+
+	if (ADRENO_LEGACY_PM4(adreno_dev)) {
+		*cmds++ = cp_type3_packet(CP_WAIT_FOR_IDLE, 1);
+		*cmds++ = 0;
+	} else
+		*cmds++ = cp_type7_packet(CP_WAIT_FOR_IDLE, 0);
+
+	return cmds - start;
+}
+
+/**
+ * cp_invalidate_state - common function for invalidating cp
+ * state
+ * @adreno_dev: The adreno device
+ * @cmds: command pointer to add gpuaddr
+ */
+static inline uint cp_invalidate_state(struct adreno_device *adreno_dev,
+				uint *cmds)
+{
+	uint *start = cmds;
+
+	if (ADRENO_GPUREV(adreno_dev) < 500) {
+		*cmds++ = cp_type3_packet(CP_INVALIDATE_STATE, 1);
+		*cmds++ = 0x7fff;
+	} else {
+		*cmds++ = cp_type7_packet(CP_SET_DRAW_STATE, 3);
+		*cmds++ = 0x40000;
+		*cmds++ = 0;
+		*cmds++ = 0;
+	}
+
+	return cmds - start;
+}
+
+#endif	/* __ADRENO_PM4TYPES_H */
diff --git a/drivers/gpu/msm/adreno_profile.c b/drivers/gpu/msm/adreno_profile.c
new file mode 100644
index 0000000..e34957e
--- /dev/null
+++ b/drivers/gpu/msm/adreno_profile.c
@@ -0,0 +1,1231 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/fs.h>
+#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/slab.h>
+#include <linux/delay.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/debugfs.h>
+
+#include "adreno.h"
+#include "adreno_profile.h"
+#include "kgsl_sharedmem.h"
+#include "adreno_pm4types.h"
+
+#define ASSIGNS_STR_FORMAT "%.8s:%u "
+
+/*
+ * Raw Data for processing later:
+ *        : 3 - timestamp, count, context id
+ * [per counter] - data for each counter
+ *        : 1 - Register offset
+ *        : 2 - Pre IB register hi/lo value
+ *        : 2 - Post IB register hi/lo value
+ * [per counter end]
+ */
+#define SIZE_DATA(cnt) (6 + (cnt) * 5)
+
+/*
+ * Pre-IB command size (in dwords):
+ *        : 2 - NOP start identifier
+ *        : 4 - timestamp
+ *        : 4 - count
+ *        : 4 - context id
+ *        : 4 - pid
+ *        : 4 - tid
+ *        : 4 - type
+ * [loop count start] - for each counter to watch
+ *        : 4 - Register offset
+ *        : 4 - Register read lo
+ *        : 4 - Register read high
+ * [loop end]
+ *        : 2 - NOP end identifier
+ */
+#define SIZE_PREIB(cnt) (28 + (cnt) * 12)
+
+/*
+ * Post-IB command size (in dwords):
+ *        : 2 - NOP start identifier
+ * [loop count start] - for each counter to watch
+ *        : 4 - Register read lo
+ *        : 4 - Register read high
+ * [loop end]
+ *        : 2 - NOP end identifier
+ */
+#define SIZE_POSTIB(cnt) (4 + (cnt) * 8)
+
+/* Counter data + Pre size + post size = total size */
+#define SIZE_SHARED_ENTRY(cnt) (SIZE_DATA(cnt) + SIZE_PREIB(cnt) \
+		+ SIZE_POSTIB(cnt))
+
+/*
+ * Space for following string :"%u %u %u %.5s %u "
+ * [count iterations]: "%.8s:%u %llu %llu%c"
+ */
+#define SIZE_PIPE_ENTRY(cnt) (50 + (cnt) * 62)
+#define SIZE_LOG_ENTRY(cnt) (6 + (cnt) * 5)
+
+static struct adreno_context_type ctxt_type_table[] = {KGSL_CONTEXT_TYPES};
+
+static const char *get_api_type_str(unsigned int type)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ctxt_type_table) - 1; i++) {
+		if (ctxt_type_table[i].type == type)
+			return ctxt_type_table[i].str;
+	}
+	return "UNKNOWN";
+}
+
+static inline uint _ib_start(struct adreno_device *adreno_dev,
+			 unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+
+	*cmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+	*cmds++ = KGSL_START_OF_PROFILE_IDENTIFIER;
+
+	return cmds - start;
+}
+
+static inline uint _ib_end(struct adreno_device *adreno_dev,
+			  unsigned int *cmds)
+{
+	unsigned int *start = cmds;
+
+	*cmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+	*cmds++ = KGSL_END_OF_PROFILE_IDENTIFIER;
+
+	return cmds - start;
+}
+
+static inline uint _ib_cmd_mem_write(struct adreno_device *adreno_dev,
+			uint *cmds, uint64_t gpuaddr, uint val, uint *off)
+{
+	unsigned int *start = cmds;
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr);
+	*cmds++ = val;
+
+	*off += sizeof(unsigned int);
+	return cmds - start;
+}
+
+static inline uint _ib_cmd_reg_to_mem(struct adreno_device *adreno_dev,
+			uint *cmds, uint64_t gpuaddr, uint val, uint *off)
+{
+	unsigned int *start = cmds;
+
+	*cmds++ = cp_mem_packet(adreno_dev, CP_REG_TO_MEM, 2, 1);
+	*cmds++ = val;
+	cmds += cp_gpuaddr(adreno_dev, cmds, gpuaddr);
+
+	*off += sizeof(unsigned int);
+	return cmds - start;
+}
+
+static inline int _create_ib_ref(struct adreno_device *adreno_dev,
+		struct kgsl_memdesc *memdesc, unsigned int *cmd,
+		unsigned int cnt, unsigned int off)
+{
+	unsigned int *start = cmd;
+
+	*cmd++ = cp_mem_packet(adreno_dev, CP_INDIRECT_BUFFER_PFE, 2, 1);
+	cmd += cp_gpuaddr(adreno_dev, cmd, (memdesc->gpuaddr + off));
+	*cmd++ = cnt;
+
+	return cmd - start;
+}
+
+static int _build_pre_ib_cmds(struct adreno_device *adreno_dev,
+		struct adreno_profile *profile,
+		unsigned int *rbcmds, unsigned int head,
+		unsigned int timestamp, struct adreno_context *drawctxt)
+{
+	struct adreno_profile_assigns_list *entry;
+	unsigned int *start, *ibcmds;
+	unsigned int count = profile->assignment_count;
+	uint64_t gpuaddr = profile->shared_buffer.gpuaddr;
+	unsigned int ib_offset = head + SIZE_DATA(count);
+	unsigned int data_offset = head * sizeof(unsigned int);
+
+	ibcmds = ib_offset + ((unsigned int *) profile->shared_buffer.hostptr);
+	start = ibcmds;
+
+	/* start of profile identifier */
+	ibcmds += _ib_start(adreno_dev, ibcmds);
+
+	/*
+	 * Write ringbuffer commands to save the following to memory:
+	 * timestamp, count, context_id, pid, tid, context type
+	 */
+	ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset,
+			timestamp, &data_offset);
+	ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset,
+			profile->assignment_count, &data_offset);
+	ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset,
+			drawctxt->base.id, &data_offset);
+	ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset,
+			drawctxt->base.proc_priv->pid, &data_offset);
+	ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset,
+			drawctxt->base.tid, &data_offset);
+	ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds, gpuaddr + data_offset,
+			drawctxt->type, &data_offset);
+
+	/* loop for each countable assigned */
+	list_for_each_entry(entry, &profile->assignments_list, list) {
+		ibcmds += _ib_cmd_mem_write(adreno_dev, ibcmds,
+				gpuaddr + data_offset, entry->offset,
+				&data_offset);
+		ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds,
+				gpuaddr + data_offset, entry->offset,
+				&data_offset);
+		ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds,
+				gpuaddr + data_offset, entry->offset_hi,
+				&data_offset);
+
+		/* skip over post_ib counter data */
+		data_offset += sizeof(unsigned int) * 2;
+	}
+
+	/* end of profile identifier */
+	ibcmds += _ib_end(adreno_dev, ibcmds);
+
+	return _create_ib_ref(adreno_dev, &profile->shared_buffer, rbcmds,
+			ibcmds - start, ib_offset * sizeof(unsigned int));
+}
+
+static int _build_post_ib_cmds(struct adreno_device *adreno_dev,
+		struct adreno_profile *profile,
+		unsigned int *rbcmds, unsigned int head)
+{
+	struct adreno_profile_assigns_list *entry;
+	unsigned int *start, *ibcmds;
+	unsigned int count = profile->assignment_count;
+	uint64_t gpuaddr =  profile->shared_buffer.gpuaddr;
+	unsigned int ib_offset = head + SIZE_DATA(count) + SIZE_PREIB(count);
+	unsigned int data_offset = head * sizeof(unsigned int);
+
+	ibcmds = ib_offset + ((unsigned int *) profile->shared_buffer.hostptr);
+	start = ibcmds;
+	/* start of profile identifier */
+	ibcmds += _ib_start(adreno_dev, ibcmds);
+
+	/* skip over pre_ib preamble */
+	data_offset += sizeof(unsigned int) * 6;
+
+	/* loop for each countable assigned */
+	list_for_each_entry(entry, &profile->assignments_list, list) {
+		/* skip over pre_ib counter data */
+		data_offset += sizeof(unsigned int) * 3;
+		ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds,
+				gpuaddr + data_offset, entry->offset,
+				&data_offset);
+		ibcmds += _ib_cmd_reg_to_mem(adreno_dev, ibcmds,
+				gpuaddr + data_offset, entry->offset_hi,
+				&data_offset);
+	}
+
+	/* end of profile identifier */
+	ibcmds += _ib_end(adreno_dev, ibcmds);
+
+	return _create_ib_ref(adreno_dev, &profile->shared_buffer, rbcmds,
+			ibcmds - start, ib_offset * sizeof(unsigned int));
+}
+
+static bool shared_buf_empty(struct adreno_profile *profile)
+{
+	if (profile->shared_buffer.hostptr == NULL ||
+			profile->shared_buffer.size == 0)
+		return true;
+
+	if (profile->shared_head == profile->shared_tail)
+		return true;
+
+	return false;
+}
+
+static inline void shared_buf_inc(unsigned int max_size,
+		unsigned int *offset, size_t inc)
+{
+	*offset = (*offset + inc) % max_size;
+}
+
+static inline void log_buf_wrapcnt(unsigned int cnt, uintptr_t *off)
+{
+	*off = (*off + cnt) % ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS;
+}
+
+static inline void log_buf_wrapinc_len(unsigned int *profile_log_buffer,
+		unsigned int **ptr, unsigned int len)
+{
+	*ptr += len;
+	if (*ptr >= (profile_log_buffer +
+				ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS))
+		*ptr -= ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS;
+}
+
+static inline void log_buf_wrapinc(unsigned int *profile_log_buffer,
+		unsigned int **ptr)
+{
+	log_buf_wrapinc_len(profile_log_buffer, ptr, 1);
+}
+
+static inline unsigned int log_buf_available(struct adreno_profile *profile,
+		unsigned int *head_ptr)
+{
+	uintptr_t tail, head;
+
+	tail = (uintptr_t) profile->log_tail -
+		(uintptr_t) profile->log_buffer;
+	head = (uintptr_t)head_ptr - (uintptr_t) profile->log_buffer;
+	if (tail > head)
+		return (tail - head) / sizeof(uintptr_t);
+	else
+		return ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS - ((head - tail) /
+				sizeof(uintptr_t));
+}
+
+static inline unsigned int shared_buf_available(struct adreno_profile *profile)
+{
+	if (profile->shared_tail > profile->shared_head)
+		return profile->shared_tail - profile->shared_head;
+	else
+		return profile->shared_size -
+			(profile->shared_head - profile->shared_tail);
+}
+
+static struct adreno_profile_assigns_list *_find_assignment_by_offset(
+		struct adreno_profile *profile, unsigned int offset)
+{
+	struct adreno_profile_assigns_list *entry;
+
+	list_for_each_entry(entry, &profile->assignments_list, list) {
+		if (entry->offset == offset)
+			return entry;
+	}
+
+	return NULL;
+}
+
+static bool _in_assignments_list(struct adreno_profile *profile,
+		unsigned int groupid, unsigned int countable)
+{
+	struct adreno_profile_assigns_list *entry;
+
+	list_for_each_entry(entry, &profile->assignments_list, list) {
+		if (entry->groupid == groupid && entry->countable ==
+				countable)
+			return true;
+	}
+
+	return false;
+}
+
+static bool _add_to_assignments_list(struct adreno_profile *profile,
+		const char *str, unsigned int groupid, unsigned int countable,
+		unsigned int offset, unsigned int offset_hi)
+{
+	struct adreno_profile_assigns_list *entry;
+
+	/* first make sure we can alloc memory */
+	entry = kmalloc(sizeof(struct adreno_profile_assigns_list), GFP_KERNEL);
+	if (!entry)
+		return false;
+
+	list_add_tail(&entry->list, &profile->assignments_list);
+
+	entry->countable = countable;
+	entry->groupid = groupid;
+	entry->offset = offset;
+	entry->offset_hi = offset_hi;
+
+	strlcpy(entry->name, str, sizeof(entry->name));
+
+	profile->assignment_count++;
+
+	return true;
+}
+
+static bool results_available(struct adreno_device *adreno_dev,
+		struct adreno_profile *profile, unsigned int *shared_buf_tail)
+{
+	unsigned int global_eop;
+	unsigned int off = profile->shared_tail;
+	unsigned int *shared_ptr = (unsigned int *)
+		profile->shared_buffer.hostptr;
+	unsigned int ts, cnt;
+	int ts_cmp;
+
+	/*
+	 * If shared_buffer empty or Memstore EOP timestamp is less than
+	 * outstanding counter buffer timestamps then no results available
+	 */
+	if (shared_buf_empty(profile))
+		return false;
+
+	if (adreno_rb_readtimestamp(adreno_dev,
+			adreno_dev->cur_rb,
+			KGSL_TIMESTAMP_RETIRED, &global_eop))
+		return false;
+	do {
+		cnt = *(shared_ptr + off + 1);
+		if (cnt == 0)
+			return false;
+
+		ts = *(shared_ptr + off);
+		ts_cmp = timestamp_cmp(ts, global_eop);
+		if (ts_cmp >= 0) {
+			*shared_buf_tail = off;
+			if (off == profile->shared_tail)
+				return false;
+			else
+				return true;
+		}
+		shared_buf_inc(profile->shared_size, &off,
+				SIZE_SHARED_ENTRY(cnt));
+	} while (off != profile->shared_head);
+
+	*shared_buf_tail = profile->shared_head;
+
+	return true;
+}
+
+static void transfer_results(struct adreno_profile *profile,
+		unsigned int shared_buf_tail)
+{
+	unsigned int buf_off;
+	unsigned int ts, cnt, ctxt_id, pid, tid, client_type;
+	unsigned int *ptr = (unsigned int *) profile->shared_buffer.hostptr;
+	unsigned int *log_ptr, *log_base;
+	struct adreno_profile_assigns_list *assigns_list;
+	int i, tmp_tail;
+
+	log_ptr = profile->log_head;
+	log_base = profile->log_buffer;
+	if (log_ptr == NULL)
+		return;
+
+	/*
+	 * go through counter buffers and format for write into log_buffer
+	 * if log buffer doesn't have space just overwrite it circularly
+	 * shared_buf is guaranteed to not wrap within an entry so can use
+	 * ptr increment
+	 */
+	while (profile->shared_tail != shared_buf_tail) {
+		buf_off = profile->shared_tail;
+		/*
+		 * format: timestamp, count, context_id
+		 * count entries: pc_off, pc_start, pc_end
+		 */
+		ts = *(ptr + buf_off++);
+		cnt = *(ptr + buf_off++);
+		ctxt_id = *(ptr + buf_off++);
+		pid = *(ptr + buf_off++);
+		tid = *(ptr + buf_off++);
+		client_type = *(ptr + buf_off++);
+
+		/*
+		 * if entry overwrites the tail of log_buffer then adjust tail
+		 * ptr to make room for the new entry, discarding old entry
+		 */
+		while (log_buf_available(profile, log_ptr) <=
+				SIZE_LOG_ENTRY(cnt)) {
+			unsigned int size_tail;
+			uintptr_t boff;
+
+			size_tail = SIZE_LOG_ENTRY(0xffff &
+					*(profile->log_tail));
+			boff = ((uintptr_t) profile->log_tail -
+				(uintptr_t) log_base) / sizeof(uintptr_t);
+			log_buf_wrapcnt(size_tail, &boff);
+			profile->log_tail = log_base + boff;
+		}
+
+		*log_ptr = cnt;
+		log_buf_wrapinc(log_base, &log_ptr);
+		*log_ptr = client_type;
+		log_buf_wrapinc(log_base, &log_ptr);
+		*log_ptr = pid;
+		log_buf_wrapinc(log_base, &log_ptr);
+		*log_ptr = tid;
+		log_buf_wrapinc(log_base, &log_ptr);
+		*log_ptr = ctxt_id;
+		log_buf_wrapinc(log_base, &log_ptr);
+		*log_ptr = ts;
+		log_buf_wrapinc(log_base, &log_ptr);
+
+		for (i = 0; i < cnt; i++) {
+			assigns_list = _find_assignment_by_offset(
+					profile, *(ptr + buf_off++));
+			if (assigns_list == NULL) {
+				*log_ptr = (unsigned int) -1;
+
+				shared_buf_inc(profile->shared_size,
+					&profile->shared_tail,
+					SIZE_SHARED_ENTRY(cnt));
+				goto err;
+			} else {
+				*log_ptr = assigns_list->groupid << 16 |
+					(assigns_list->countable & 0xffff);
+			}
+			log_buf_wrapinc(log_base, &log_ptr);
+			*log_ptr  = *(ptr + buf_off++); /* perf cntr start hi */
+			log_buf_wrapinc(log_base, &log_ptr);
+			*log_ptr = *(ptr + buf_off++);  /* perf cntr start lo */
+			log_buf_wrapinc(log_base, &log_ptr);
+			*log_ptr = *(ptr + buf_off++);  /* perf cntr end hi */
+			log_buf_wrapinc(log_base, &log_ptr);
+			*log_ptr = *(ptr + buf_off++);  /* perf cntr end lo */
+			log_buf_wrapinc(log_base, &log_ptr);
+
+		}
+
+		tmp_tail = profile->shared_tail;
+		shared_buf_inc(profile->shared_size,
+				&profile->shared_tail,
+				SIZE_SHARED_ENTRY(cnt));
+		/*
+		 * Possibly lost some room as we cycled around, so it's safe to
+		 * reset the max size
+		 */
+		if (profile->shared_tail < tmp_tail)
+			profile->shared_size =
+				ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS;
+
+	}
+	profile->log_head = log_ptr;
+	return;
+err:
+	/* reset head/tail to same on error in hopes we work correctly later */
+	profile->log_head = profile->log_tail;
+}
+
+static int profile_enable_get(void *data, u64 *val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	mutex_lock(&device->mutex);
+	*val = adreno_profile_enabled(&adreno_dev->profile);
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static int profile_enable_set(void *data, u64 val)
+{
+	struct kgsl_device *device = data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_profile *profile = &adreno_dev->profile;
+
+	mutex_lock(&device->mutex);
+
+	if (val && profile->log_buffer == NULL) {
+		/* allocate profile_log_buffer the first time enabled */
+		profile->log_buffer = vmalloc(ADRENO_PROFILE_LOG_BUF_SIZE);
+		if (profile->log_buffer == NULL) {
+			mutex_unlock(&device->mutex);
+			return -ENOMEM;
+		}
+		profile->log_tail = profile->log_buffer;
+		profile->log_head = profile->log_buffer;
+	}
+
+	profile->enabled = val;
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static ssize_t profile_assignments_read(struct file *filep,
+		char __user *ubuf, size_t max, loff_t *ppos)
+{
+	struct kgsl_device *device = (struct kgsl_device *) filep->private_data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_profile *profile = &adreno_dev->profile;
+	struct adreno_profile_assigns_list *entry;
+	int len = 0, max_size = PAGE_SIZE;
+	char *buf, *pos;
+	ssize_t size = 0;
+
+	mutex_lock(&device->mutex);
+
+	if (profile->assignment_count == 0) {
+		mutex_unlock(&device->mutex);
+		return 0;
+	}
+
+	buf = kmalloc(max_size, GFP_KERNEL);
+	if (!buf) {
+		mutex_unlock(&device->mutex);
+		return -ENOMEM;
+	}
+
+	pos = buf;
+
+	/* copy all assingments from list to str */
+	list_for_each_entry(entry, &profile->assignments_list, list) {
+		len = snprintf(pos, max_size, ASSIGNS_STR_FORMAT,
+				entry->name, entry->countable);
+
+		max_size -= len;
+		pos += len;
+	}
+
+	size = simple_read_from_buffer(ubuf, max, ppos, buf,
+			strlen(buf));
+
+	kfree(buf);
+
+	mutex_unlock(&device->mutex);
+	return size;
+}
+
+static void _remove_assignment(struct adreno_device *adreno_dev,
+		unsigned int groupid, unsigned int countable)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	struct adreno_profile_assigns_list *entry, *tmp;
+
+	list_for_each_entry_safe(entry, tmp, &profile->assignments_list, list) {
+		if (entry->groupid == groupid &&
+				entry->countable == countable) {
+			list_del(&entry->list);
+
+			profile->assignment_count--;
+
+			kfree(entry);
+
+			/* remove from perf counter allocation */
+			adreno_perfcounter_put(adreno_dev, groupid, countable,
+					PERFCOUNTER_FLAG_KERNEL);
+		}
+	}
+}
+
+static void _add_assignment(struct adreno_device *adreno_dev,
+		unsigned int groupid, unsigned int countable)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	unsigned int offset, offset_hi;
+	const char *name = NULL;
+
+	name = adreno_perfcounter_get_name(adreno_dev, groupid);
+	if (!name)
+		return;
+
+	/* if already in assigned list skip it */
+	if (_in_assignments_list(profile, groupid, countable))
+		return;
+
+	/* add to perf counter allocation, if fail skip it */
+	if (adreno_perfcounter_get(adreno_dev, groupid, countable,
+				&offset, &offset_hi, PERFCOUNTER_FLAG_NONE))
+		return;
+
+	/* add to assignments list, put counter back if error */
+	if (!_add_to_assignments_list(profile, name, groupid,
+				countable, offset, offset_hi))
+		adreno_perfcounter_put(adreno_dev, groupid,
+				countable, PERFCOUNTER_FLAG_KERNEL);
+}
+
+static char *_parse_next_assignment(struct adreno_device *adreno_dev,
+		char *str, int *groupid, int *countable, bool *remove)
+{
+	char *groupid_str, *countable_str, *next_str = NULL;
+	int ret;
+
+	*groupid = -EINVAL;
+	*countable = -EINVAL;
+	*remove = false;
+
+	/* remove spaces */
+	while (*str == ' ')
+		str++;
+
+	/* check if it's a remove assignment */
+	if (*str == '-') {
+		*remove = true;
+		str++;
+	}
+
+	/* get the groupid string */
+	groupid_str = str;
+	while (*str != ':') {
+		if (*str == '\0')
+			return NULL;
+		*str = tolower(*str);
+		str++;
+	}
+	if (groupid_str == str)
+		return NULL;
+
+	*str = '\0';
+	str++;
+
+	/* get the countable string */
+	countable_str = str;
+	while (*str != ' ' && *str != '\0')
+		str++;
+	if (countable_str == str)
+		return NULL;
+
+	/*
+	 * If we have reached the end of the original string then make sure we
+	 * return NULL from this function or we could accidently overrun
+	 */
+
+	if (*str != '\0') {
+		*str = '\0';
+		next_str = str + 1;
+	}
+
+	/* set results */
+	*groupid = adreno_perfcounter_get_groupid(adreno_dev,
+			groupid_str);
+	if (*groupid < 0)
+		return NULL;
+	ret = kstrtou32(countable_str, 10, countable);
+	if (ret)
+		return NULL;
+
+	return next_str;
+}
+
+static ssize_t profile_assignments_write(struct file *filep,
+		const char __user *user_buf, size_t len, loff_t *off)
+{
+	struct kgsl_device *device = (struct kgsl_device *) filep->private_data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_profile *profile = &adreno_dev->profile;
+	size_t size = 0;
+	char *buf, *pbuf;
+	bool remove_assignment = false;
+	int groupid, countable, ret;
+
+	if (len >= PAGE_SIZE || len == 0)
+		return -EINVAL;
+
+	buf = kmalloc(len + 1, GFP_KERNEL);
+	if (buf == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(buf, user_buf, len)) {
+		size = -EFAULT;
+		goto error_free;
+	}
+
+	mutex_lock(&device->mutex);
+
+	if (adreno_profile_enabled(profile)) {
+		size = -EINVAL;
+		goto error_unlock;
+	}
+
+	ret = kgsl_active_count_get(device);
+	if (ret) {
+		size = ret;
+		goto error_unlock;
+	}
+
+	/*
+	 * When adding/removing assignments, ensure that the GPU is done with
+	 * all it's work.  This helps to synchronize the work flow to the
+	 * GPU and avoid racey conditions.
+	 */
+	if (adreno_idle(device)) {
+		size = -ETIMEDOUT;
+		goto error_put;
+	}
+
+	/* clear all shared buffer results */
+	adreno_profile_process_results(adreno_dev);
+
+	pbuf = buf;
+
+	/* clear the log buffer */
+	if (profile->log_buffer != NULL) {
+		profile->log_head = profile->log_buffer;
+		profile->log_tail = profile->log_buffer;
+	}
+
+
+	/* for sanity and parsing, ensure it is null terminated */
+	buf[len] = '\0';
+
+	/* parse file buf and add(remove) to(from) appropriate lists */
+	while (pbuf) {
+		pbuf = _parse_next_assignment(adreno_dev, pbuf, &groupid,
+				&countable, &remove_assignment);
+		if (groupid < 0 || countable < 0)
+			break;
+
+		if (remove_assignment)
+			_remove_assignment(adreno_dev, groupid, countable);
+		else
+			_add_assignment(adreno_dev, groupid, countable);
+	}
+
+	size = len;
+
+error_put:
+	kgsl_active_count_put(device);
+error_unlock:
+	mutex_unlock(&device->mutex);
+error_free:
+	kfree(buf);
+	return size;
+}
+
+static int _pipe_print_pending(char __user *ubuf, size_t max)
+{
+	loff_t unused = 0;
+	char str[] = "Operation Would Block!";
+
+	return simple_read_from_buffer(ubuf, max,
+			&unused, str, strlen(str));
+}
+
+static int _pipe_print_results(struct adreno_device *adreno_dev,
+		char __user *ubuf, size_t max)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	const char *grp_name;
+	char __user *usr_buf = ubuf;
+	unsigned int *log_ptr = NULL, *tmp_log_ptr = NULL;
+	int len, i;
+	int status = 0;
+	ssize_t size, total_size = 0;
+	unsigned int cnt, api_type, ctxt_id, pid, tid, ts, cnt_reg;
+	unsigned long long pc_start, pc_end;
+	const char *api_str;
+	char format_space;
+	loff_t unused = 0;
+	char pipe_hdr_buf[51];   /* 4 uint32 + 5 space + 5 API type + '\0' */
+	char pipe_cntr_buf[63];  /* 2 uint64 + 1 uint32 + 4 spaces + 8 group */
+
+	/* convert unread entries to ASCII, copy to user-space */
+	log_ptr = profile->log_tail;
+
+	do {
+		/* store the tmp var for error cases so we can skip */
+		tmp_log_ptr = log_ptr;
+
+		/* Too many to output to pipe, so skip this data */
+		cnt = *log_ptr;
+		log_buf_wrapinc(profile->log_buffer, &log_ptr);
+
+		if (SIZE_PIPE_ENTRY(cnt) > max) {
+			log_buf_wrapinc_len(profile->log_buffer,
+				&tmp_log_ptr, SIZE_PIPE_ENTRY(cnt));
+			log_ptr = tmp_log_ptr;
+			goto done;
+		}
+
+		/*
+		 * Not enough space left in pipe, return without doing
+		 * anything
+		 */
+		if ((max - (usr_buf - ubuf)) < SIZE_PIPE_ENTRY(cnt)) {
+			log_ptr = tmp_log_ptr;
+			goto done;
+		}
+
+		api_type = *log_ptr;
+		api_str = get_api_type_str(api_type);
+		log_buf_wrapinc(profile->log_buffer, &log_ptr);
+		pid = *log_ptr;
+		log_buf_wrapinc(profile->log_buffer, &log_ptr);
+		tid = *log_ptr;
+		log_buf_wrapinc(profile->log_buffer, &log_ptr);
+		ctxt_id =  *log_ptr;
+		log_buf_wrapinc(profile->log_buffer, &log_ptr);
+		ts = *log_ptr;
+		log_buf_wrapinc(profile->log_buffer, &log_ptr);
+		len = snprintf(pipe_hdr_buf, sizeof(pipe_hdr_buf) - 1,
+				"%u %u %u %.5s %u ",
+				pid, tid, ctxt_id, api_str, ts);
+		size = simple_read_from_buffer(usr_buf,
+				max - (usr_buf - ubuf),
+				&unused, pipe_hdr_buf, len);
+
+		/* non-fatal error, so skip rest of entry and return */
+		if (size < 0) {
+			log_buf_wrapinc_len(profile->log_buffer,
+				&tmp_log_ptr, SIZE_PIPE_ENTRY(cnt));
+			log_ptr = tmp_log_ptr;
+			goto done;
+		}
+
+		unused = 0;
+		usr_buf += size;
+		total_size += size;
+
+		for (i = 0; i < cnt; i++) {
+			unsigned int start_lo, start_hi;
+			unsigned int end_lo, end_hi;
+
+			grp_name = adreno_perfcounter_get_name(
+					adreno_dev, (*log_ptr >> 16) & 0xffff);
+
+			/* non-fatal error, so skip rest of entry and return */
+			if (grp_name == NULL) {
+				log_buf_wrapinc_len(profile->log_buffer,
+					&tmp_log_ptr, SIZE_PIPE_ENTRY(cnt));
+				log_ptr = tmp_log_ptr;
+				goto done;
+			}
+
+			if (i == cnt - 1)
+				format_space = '\n';
+			else
+				format_space = ' ';
+
+			cnt_reg = *log_ptr & 0xffff;
+			log_buf_wrapinc(profile->log_buffer, &log_ptr);
+			start_lo = *log_ptr;
+			log_buf_wrapinc(profile->log_buffer, &log_ptr);
+			start_hi = *log_ptr;
+			log_buf_wrapinc(profile->log_buffer, &log_ptr);
+			end_lo = *log_ptr;
+			log_buf_wrapinc(profile->log_buffer, &log_ptr);
+			end_hi = *log_ptr;
+			log_buf_wrapinc(profile->log_buffer, &log_ptr);
+
+			pc_start = (((uint64_t) start_hi) << 32) | start_lo;
+			pc_end = (((uint64_t) end_hi) << 32) | end_lo;
+
+			len = snprintf(pipe_cntr_buf,
+					sizeof(pipe_cntr_buf) - 1,
+					"%.8s:%u %llu %llu%c",
+					grp_name, cnt_reg, pc_start,
+					pc_end, format_space);
+
+			size = simple_read_from_buffer(usr_buf,
+					max - (usr_buf - ubuf),
+					&unused, pipe_cntr_buf, len);
+
+			/* non-fatal error, so skip rest of entry and return */
+			if (size < 0) {
+				log_buf_wrapinc_len(profile->log_buffer,
+					&tmp_log_ptr, SIZE_PIPE_ENTRY(cnt));
+				log_ptr = tmp_log_ptr;
+				goto done;
+			}
+			unused = 0;
+			usr_buf += size;
+			total_size += size;
+		}
+	} while (log_ptr != profile->log_head);
+
+done:
+	status = total_size;
+	profile->log_tail = log_ptr;
+
+	return status;
+}
+
+static ssize_t profile_pipe_print(struct file *filep, char __user *ubuf,
+		size_t max, loff_t *ppos)
+{
+	struct kgsl_device *device = (struct kgsl_device *) filep->private_data;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_profile *profile = &adreno_dev->profile;
+	char __user *usr_buf = ubuf;
+	int status = 0;
+
+	/*
+	 * this file not seekable since it only supports streaming, ignore
+	 * ppos <> 0
+	 */
+	/*
+	 * format <pid>  <tid> <context id> <cnt<<16 | client type> <timestamp>
+	 * for each perf counter <cntr_reg_off> <start hi & lo> <end hi & low>
+	 */
+
+	mutex_lock(&device->mutex);
+
+	while (1) {
+		/* process any results that are available into the log_buffer */
+		status = adreno_profile_process_results(adreno_dev);
+		if (status > 0) {
+			/* if we have results, print them and exit */
+			status = _pipe_print_results(adreno_dev, usr_buf, max);
+			break;
+		}
+
+		/* there are no unread results, act accordingly */
+		if (filep->f_flags & O_NONBLOCK) {
+			if (profile->shared_tail != profile->shared_head) {
+				status = _pipe_print_pending(usr_buf, max);
+				break;
+			}
+
+			status = 0;
+			break;
+		}
+
+		mutex_unlock(&device->mutex);
+		set_current_state(TASK_INTERRUPTIBLE);
+		schedule_timeout(msecs_to_jiffies(100));
+		mutex_lock(&device->mutex);
+
+		if (signal_pending(current)) {
+			status = 0;
+			break;
+		}
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return status;
+}
+
+static int profile_groups_print(struct seq_file *s, void *unused)
+{
+	struct kgsl_device *device = (struct kgsl_device *) s->private;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct adreno_perfcounters *counters = gpudev->perfcounters;
+	struct adreno_perfcount_group *group;
+	int i, j, used;
+
+	mutex_lock(&device->mutex);
+
+	for (i = 0; i < counters->group_count; ++i) {
+		group = &(counters->groups[i]);
+		/* get number of counters used for this group */
+		used = 0;
+		for (j = 0; j < group->reg_count; j++) {
+			if (group->regs[j].countable !=
+					KGSL_PERFCOUNTER_NOT_USED)
+				used++;
+		}
+
+		seq_printf(s, "%s %d %d\n", group->name,
+			group->reg_count, used);
+	}
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static int profile_groups_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, profile_groups_print, inode->i_private);
+}
+
+static const struct file_operations profile_groups_fops = {
+	.owner = THIS_MODULE,
+	.open = profile_groups_open,
+	.read = seq_read,
+	.llseek = noop_llseek,
+	.release = single_release,
+};
+
+static const struct file_operations profile_pipe_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = profile_pipe_print,
+	.llseek = noop_llseek,
+};
+
+static const struct file_operations profile_assignments_fops = {
+	.owner = THIS_MODULE,
+	.open = simple_open,
+	.read = profile_assignments_read,
+	.write = profile_assignments_write,
+	.llseek = noop_llseek,
+};
+
+DEFINE_SIMPLE_ATTRIBUTE(profile_enable_fops,
+			profile_enable_get,
+			profile_enable_set, "%llu\n");
+
+void adreno_profile_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_profile *profile = &adreno_dev->profile;
+	struct dentry *profile_dir;
+	int ret;
+
+	profile->enabled = false;
+
+	/* allocate shared_buffer, which includes pre_ib and post_ib */
+	profile->shared_size = ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS;
+	ret = kgsl_allocate_global(device, &profile->shared_buffer,
+			profile->shared_size * sizeof(unsigned int),
+			0, 0, "profile");
+
+	if (ret) {
+		profile->shared_size = 0;
+		return;
+	}
+
+	INIT_LIST_HEAD(&profile->assignments_list);
+
+	/* Create perf counter debugfs */
+	profile_dir = debugfs_create_dir("profiling", device->d_debugfs);
+	if (IS_ERR(profile_dir))
+		return;
+
+	debugfs_create_file("enable",  0644, profile_dir, device,
+			&profile_enable_fops);
+	debugfs_create_file("blocks", 0444, profile_dir, device,
+			&profile_groups_fops);
+	debugfs_create_file("pipe", 0444, profile_dir, device,
+			&profile_pipe_fops);
+	debugfs_create_file("assignments", 0644, profile_dir, device,
+			&profile_assignments_fops);
+}
+
+void adreno_profile_close(struct adreno_device *adreno_dev)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	struct adreno_profile_assigns_list *entry, *tmp;
+
+	profile->enabled = false;
+	vfree(profile->log_buffer);
+	profile->log_buffer = NULL;
+	profile->log_head = NULL;
+	profile->log_tail = NULL;
+	profile->shared_head = 0;
+	profile->shared_tail = 0;
+	kgsl_free_global(KGSL_DEVICE(adreno_dev), &profile->shared_buffer);
+	profile->shared_size = 0;
+
+	profile->assignment_count = 0;
+
+	list_for_each_entry_safe(entry, tmp, &profile->assignments_list, list) {
+		list_del(&entry->list);
+		kfree(entry);
+	}
+}
+
+int adreno_profile_process_results(struct adreno_device *adreno_dev)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	unsigned int shared_buf_tail = profile->shared_tail;
+
+	if (!results_available(adreno_dev, profile, &shared_buf_tail))
+		return 0;
+
+	/*
+	 * transfer retired results to log_buffer
+	 * update shared_buffer tail ptr
+	 */
+	transfer_results(profile, shared_buf_tail);
+
+	return 1;
+}
+
+void adreno_profile_preib_processing(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt, unsigned int *cmd_flags,
+		unsigned int **rbptr)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	int count = profile->assignment_count;
+	unsigned int entry_head = profile->shared_head;
+	unsigned int *shared_ptr;
+	struct adreno_ringbuffer *rb = ADRENO_CURRENT_RINGBUFFER(adreno_dev);
+	unsigned int rbcmds[4];
+	unsigned int *ptr = *rbptr;
+	unsigned int i, ret = 0;
+
+	*cmd_flags &= ~KGSL_CMD_FLAGS_PROFILE;
+
+	if (!adreno_profile_assignments_ready(profile))
+		goto done;
+
+	/*
+	 * check if space available, include the post_ib in space available
+	 * check so don't have to handle trying to undo the pre_ib insertion in
+	 * ringbuffer in the case where only the post_ib fails enough space
+	 */
+	if (SIZE_SHARED_ENTRY(count) >= shared_buf_available(profile))
+		goto done;
+
+	if (entry_head + SIZE_SHARED_ENTRY(count) >= profile->shared_size) {
+		/* entry_head would wrap, start entry_head at 0 in buffer */
+		entry_head = 0;
+		profile->shared_size = profile->shared_head;
+		profile->shared_head = 0;
+
+		/* recheck space available */
+		if (SIZE_SHARED_ENTRY(count) >= shared_buf_available(profile))
+			goto done;
+	}
+
+	/* zero out the counter area of shared_buffer entry_head */
+	shared_ptr = entry_head + ((unsigned int *)
+			profile->shared_buffer.hostptr);
+	memset(shared_ptr, 0, SIZE_SHARED_ENTRY(count) * sizeof(unsigned int));
+
+	/* reserve space for the pre ib shared buffer */
+	shared_buf_inc(profile->shared_size, &profile->shared_head,
+			SIZE_SHARED_ENTRY(count));
+
+	/* create the shared ibdesc */
+	ret = _build_pre_ib_cmds(adreno_dev, profile, rbcmds, entry_head,
+			rb->timestamp + 1, drawctxt);
+
+	/* set flag to sync with post ib commands */
+	*cmd_flags |= KGSL_CMD_FLAGS_PROFILE;
+
+done:
+	/* write the ibdesc to the ringbuffer */
+	for (i = 0; i < ret; i++)
+		*ptr++ = rbcmds[i];
+
+	*rbptr = ptr;
+}
+
+void adreno_profile_postib_processing(struct adreno_device *adreno_dev,
+		unsigned int *cmd_flags, unsigned int **rbptr)
+{
+	struct adreno_profile *profile = &adreno_dev->profile;
+	int count = profile->assignment_count;
+	unsigned int entry_head = profile->shared_head -
+		SIZE_SHARED_ENTRY(count);
+	unsigned int *ptr = *rbptr;
+	unsigned int rbcmds[4];
+	int ret = 0, i;
+
+	if (!adreno_profile_assignments_ready(profile))
+		goto done;
+
+	if (!(*cmd_flags & KGSL_CMD_FLAGS_PROFILE))
+		goto done;
+
+	/* create the shared ibdesc */
+	ret = _build_post_ib_cmds(adreno_dev, profile, rbcmds, entry_head);
+
+done:
+	/* write the ibdesc to the ringbuffer */
+	for (i = 0; i < ret; i++)
+		*ptr++ = rbcmds[i];
+
+	*rbptr = ptr;
+
+	/* reset the sync flag */
+	*cmd_flags &= ~KGSL_CMD_FLAGS_PROFILE;
+}
+
diff --git a/drivers/gpu/msm/adreno_profile.h b/drivers/gpu/msm/adreno_profile.h
new file mode 100644
index 0000000..4d81abd
--- /dev/null
+++ b/drivers/gpu/msm/adreno_profile.h
@@ -0,0 +1,111 @@
+/* Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_PROFILE_H
+#define __ADRENO_PROFILE_H
+#include <linux/seq_file.h>
+
+/**
+ * struct adreno_profile_assigns_list: linked list for assigned perf counters
+ * @list: linkage  for nodes in list
+ * @name: group name  or GPU name name
+ * @groupid: group id
+ * @countable: countable assigned to perfcounter
+ * @offset: perfcounter register address offset
+ */
+struct adreno_profile_assigns_list {
+	struct list_head list;
+	char name[25];
+	unsigned int groupid;
+	unsigned int countable;
+	unsigned int offset;    /* LO offset */
+	unsigned int offset_hi; /* HI offset */
+};
+
+struct adreno_profile {
+	struct list_head assignments_list; /* list of all assignments */
+	unsigned int assignment_count;  /* Number of assigned counters */
+	unsigned int *log_buffer;
+	unsigned int *log_head;
+	unsigned int *log_tail;
+	bool enabled;
+	/* counter, pre_ib, and post_ib held in one large circular buffer
+	 * shared between kgsl and GPU
+	 * counter entry 0
+	 * pre_ib entry 0
+	 * post_ib entry 0
+	 * ...
+	 * counter entry N
+	 * pre_ib entry N
+	 * post_ib entry N
+	 */
+	struct kgsl_memdesc shared_buffer;
+	unsigned int shared_head;
+	unsigned int shared_tail;
+	unsigned int shared_size;
+};
+
+#define ADRENO_PROFILE_SHARED_BUF_SIZE_DWORDS (48 * 4096 / sizeof(uint))
+/* sized @ 48 pages should allow for over 50 outstanding IBs minimum, 1755 max*/
+
+#define ADRENO_PROFILE_LOG_BUF_SIZE  (1024 * 920)
+/* sized for 1024 entries of fully assigned 45 cnters in log buffer, 230 pages*/
+#define ADRENO_PROFILE_LOG_BUF_SIZE_DWORDS  (ADRENO_PROFILE_LOG_BUF_SIZE / \
+						sizeof(unsigned int))
+
+#ifdef CONFIG_DEBUG_FS
+void adreno_profile_init(struct adreno_device *adreno_dev);
+void adreno_profile_close(struct adreno_device *adreno_dev);
+int adreno_profile_process_results(struct  adreno_device *adreno_dev);
+void adreno_profile_preib_processing(struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt, unsigned int *cmd_flags,
+		unsigned int **rbptr);
+void adreno_profile_postib_processing(struct  adreno_device *adreno_dev,
+		unsigned int *cmd_flags, unsigned int **rbptr);
+#else
+static inline void adreno_profile_init(struct adreno_device *adreno_dev) { }
+static inline void adreno_profile_close(struct adreno_device *adreno_dev) { }
+static inline int adreno_profile_process_results(
+		struct adreno_device *adreno_dev)
+{
+	return 0;
+}
+
+static inline void adreno_profile_preib_processing(
+		struct adreno_device *adreno_dev,
+		struct adreno_context *drawctxt, unsigned int *cmd_flags,
+		unsigned int **rbptr) { }
+
+static inline void adreno_profile_postib_processing(
+		struct adreno_device *adreno_dev,
+		unsigned int *cmd_flags, unsigned int **rbptr) { }
+#endif
+
+static inline bool adreno_profile_enabled(struct adreno_profile *profile)
+{
+	return profile->enabled;
+}
+
+static inline bool adreno_profile_has_assignments(
+	struct adreno_profile *profile)
+{
+	return list_empty(&profile->assignments_list) ? false : true;
+}
+
+static inline bool adreno_profile_assignments_ready(
+	struct adreno_profile *profile)
+{
+	return adreno_profile_enabled(profile) &&
+		adreno_profile_has_assignments(profile);
+}
+
+#endif
diff --git a/drivers/gpu/msm/adreno_ringbuffer.c b/drivers/gpu/msm/adreno_ringbuffer.c
new file mode 100644
index 0000000..2aa9b00
--- /dev/null
+++ b/drivers/gpu/msm/adreno_ringbuffer.c
@@ -0,0 +1,1053 @@
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/log2.h>
+#include <linux/time.h>
+#include <linux/delay.h>
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_trace.h"
+#include "kgsl_pwrctrl.h"
+
+#include "adreno.h"
+#include "adreno_iommu.h"
+#include "adreno_pm4types.h"
+#include "adreno_ringbuffer.h"
+
+#include "a3xx_reg.h"
+#include "adreno_a5xx.h"
+
+#define RB_HOSTPTR(_rb, _pos) \
+	((unsigned int *) ((_rb)->buffer_desc.hostptr + \
+		((_pos) * sizeof(unsigned int))))
+
+#define RB_GPUADDR(_rb, _pos) \
+	((_rb)->buffer_desc.gpuaddr + ((_pos) * sizeof(unsigned int)))
+
+static void adreno_get_submit_time(struct adreno_device *adreno_dev,
+		struct adreno_submit_time *time)
+{
+	unsigned long flags;
+	/*
+	 * Here we are attempting to create a mapping between the
+	 * GPU time domain (alwayson counter) and the CPU time domain
+	 * (local_clock) by sampling both values as close together as
+	 * possible. This is useful for many types of debugging and
+	 * profiling. In order to make this mapping as accurate as
+	 * possible, we must turn off interrupts to avoid running
+	 * interrupt handlers between the two samples.
+	 */
+
+	local_irq_save(flags);
+
+	/* Read always on registers */
+	if (!adreno_is_a3xx(adreno_dev)) {
+		adreno_readreg64(adreno_dev,
+			ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO,
+			ADRENO_REG_RBBM_ALWAYSON_COUNTER_HI,
+			&time->ticks);
+
+		/* Mask hi bits as they may be incorrect on some targets */
+		if (ADRENO_GPUREV(adreno_dev) >= 400 &&
+				ADRENO_GPUREV(adreno_dev) <= ADRENO_REV_A530)
+			time->ticks &= 0xFFFFFFFF;
+	} else
+		time->ticks = 0;
+
+	/* Get the kernel clock for time since boot */
+	time->ktime = local_clock();
+
+	/* Get the timeofday for the wall time (for the user) */
+	getnstimeofday(&time->utime);
+
+	local_irq_restore(flags);
+}
+
+void adreno_ringbuffer_wptr(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&rb->preempt_lock, flags);
+	if (adreno_in_preempt_state(adreno_dev, ADRENO_PREEMPT_NONE)) {
+
+		if (adreno_dev->cur_rb == rb) {
+			/*
+			 * Let the pwrscale policy know that new commands have
+			 * been submitted.
+			 */
+			kgsl_pwrscale_busy(KGSL_DEVICE(adreno_dev));
+			adreno_writereg(adreno_dev, ADRENO_REG_CP_RB_WPTR,
+				rb->_wptr);
+		}
+	}
+
+	rb->wptr = rb->_wptr;
+	spin_unlock_irqrestore(&rb->preempt_lock, flags);
+}
+
+void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+
+	if (time != NULL)
+		adreno_get_submit_time(adreno_dev, time);
+
+	adreno_ringbuffer_wptr(adreno_dev, rb);
+}
+
+int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time, unsigned int timeout)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+
+	adreno_ringbuffer_submit(rb, time);
+	return adreno_spin_idle(adreno_dev, timeout);
+}
+
+unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
+		unsigned int dwords)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	unsigned int rptr = adreno_get_rptr(rb);
+	unsigned int ret;
+
+	if (rptr <= rb->_wptr) {
+		unsigned int *cmds;
+
+		if (rb->_wptr + dwords <= (KGSL_RB_DWORDS - 2)) {
+			ret = rb->_wptr;
+			rb->_wptr = (rb->_wptr + dwords) % KGSL_RB_DWORDS;
+			return RB_HOSTPTR(rb, ret);
+		}
+
+		/*
+		 * There isn't enough space toward the end of ringbuffer. So
+		 * look for space from the beginning of ringbuffer upto the
+		 * read pointer.
+		 */
+		if (dwords < rptr) {
+			cmds = RB_HOSTPTR(rb, rb->_wptr);
+			*cmds = cp_packet(adreno_dev, CP_NOP,
+				KGSL_RB_DWORDS - rb->_wptr - 1);
+			rb->_wptr = dwords;
+			return RB_HOSTPTR(rb, 0);
+		}
+	}
+
+	if (rb->_wptr + dwords < rptr) {
+		ret = rb->_wptr;
+		rb->_wptr = (rb->_wptr + dwords) % KGSL_RB_DWORDS;
+		return RB_HOSTPTR(rb, ret);
+	}
+
+	return ERR_PTR(-ENOSPC);
+}
+
+/**
+ * adreno_ringbuffer_start() - Ringbuffer start
+ * @adreno_dev: Pointer to adreno device
+ * @start_type: Warm or cold start
+ */
+int adreno_ringbuffer_start(struct adreno_device *adreno_dev,
+	unsigned int start_type)
+{
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_ringbuffer *rb;
+	int i;
+
+	/* Setup the ringbuffers state before we start */
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i) {
+		kgsl_sharedmem_set(device, &(rb->buffer_desc),
+				0, 0xAA, KGSL_RB_SIZE);
+		kgsl_sharedmem_writel(device, &device->scratch,
+				SCRATCH_RPTR_OFFSET(rb->id), 0);
+		rb->wptr = 0;
+		rb->_wptr = 0;
+		rb->wptr_preempt_end = 0xFFFFFFFF;
+		rb->starve_timer_state =
+			ADRENO_DISPATCHER_RB_STARVE_TIMER_UNINIT;
+	}
+
+	/* start is specific GPU rb */
+	return gpudev->rb_start(adreno_dev, start_type);
+}
+
+void adreno_ringbuffer_stop(struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i)
+		kgsl_cancel_events(KGSL_DEVICE(adreno_dev), &(rb->events));
+}
+
+static int _rb_readtimestamp(struct kgsl_device *device,
+		void *priv, enum kgsl_timestamp_type type,
+		unsigned int *timestamp)
+{
+	return adreno_rb_readtimestamp(ADRENO_DEVICE(device), priv, type,
+		timestamp);
+}
+
+static int _adreno_ringbuffer_probe(struct adreno_device *adreno_dev,
+		int id)
+{
+	struct adreno_ringbuffer *rb = &adreno_dev->ringbuffers[id];
+	int ret;
+	char name[64];
+
+	rb->id = id;
+
+	snprintf(name, sizeof(name), "rb_events-%d", id);
+	kgsl_add_event_group(&rb->events, NULL, name,
+		_rb_readtimestamp, rb);
+	rb->timestamp = 0;
+	init_waitqueue_head(&rb->ts_expire_waitq);
+
+	spin_lock_init(&rb->preempt_lock);
+
+	/*
+	 * Allocate mem for storing RB pagetables and commands to
+	 * switch pagetable
+	 */
+	ret = kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->pagetable_desc,
+		PAGE_SIZE, 0, KGSL_MEMDESC_PRIVILEGED, "pagetable_desc");
+	if (ret)
+		return ret;
+	return kgsl_allocate_global(KGSL_DEVICE(adreno_dev), &rb->buffer_desc,
+			KGSL_RB_SIZE, KGSL_MEMFLAGS_GPUREADONLY,
+			0, "ringbuffer");
+}
+
+int adreno_ringbuffer_probe(struct adreno_device *adreno_dev, bool nopreempt)
+{
+	int status = 0;
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	int i;
+
+	if (nopreempt == false && ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION))
+		adreno_dev->num_ringbuffers = gpudev->num_prio_levels;
+	else
+		adreno_dev->num_ringbuffers = 1;
+
+	for (i = 0; i < adreno_dev->num_ringbuffers; i++) {
+		status = _adreno_ringbuffer_probe(adreno_dev, i);
+		if (status != 0)
+			break;
+	}
+
+	if (status)
+		adreno_ringbuffer_close(adreno_dev);
+	else
+		adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+
+	return status;
+}
+
+static void _adreno_ringbuffer_close(struct adreno_device *adreno_dev,
+		struct adreno_ringbuffer *rb)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	kgsl_free_global(device, &rb->pagetable_desc);
+	kgsl_free_global(device, &rb->preemption_desc);
+
+	kgsl_free_global(device, &rb->buffer_desc);
+	kgsl_del_event_group(&rb->events);
+	memset(rb, 0, sizeof(struct adreno_ringbuffer));
+}
+
+void adreno_ringbuffer_close(struct adreno_device *adreno_dev)
+{
+	struct adreno_ringbuffer *rb;
+	int i;
+
+	FOR_EACH_RINGBUFFER(adreno_dev, rb, i)
+		_adreno_ringbuffer_close(adreno_dev, rb);
+}
+
+/*
+ * cp_secure_mode() - Put GPU in trusted mode
+ * @adreno_dev: Pointer to adreno device
+ * @cmds: Pointer to cmds to be put in the ringbuffer
+ * @set: 1 - secure mode, 0 - unsecure mode
+ *
+ * Add commands to the ringbuffer to put the GPU in secure mode
+ * or unsecure mode based on the variable set.
+ */
+int cp_secure_mode(struct adreno_device *adreno_dev, uint *cmds,
+				int set)
+{
+	uint *start = cmds;
+
+	if (adreno_is_a4xx(adreno_dev)) {
+		cmds += cp_wait_for_idle(adreno_dev, cmds);
+		/*
+		 * The two commands will stall the PFP until the PFP-ME-AHB
+		 * is drained and the GPU is idle. As soon as this happens,
+		 * the PFP will start moving again.
+		 */
+		cmds += cp_wait_for_me(adreno_dev, cmds);
+
+		/*
+		 * Below commands are processed by ME. GPU will be
+		 * idle when they are processed. But the PFP will continue
+		 * to fetch instructions at the same time.
+		 */
+		*cmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*cmds++ = 0;
+		*cmds++ = cp_packet(adreno_dev, CP_WIDE_REG_WRITE, 2);
+		*cmds++ = adreno_getreg(adreno_dev,
+				ADRENO_REG_RBBM_SECVID_TRUST_CONTROL);
+		*cmds++ = set;
+		*cmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*cmds++ = 1;
+
+		/* Stall PFP until all above commands are complete */
+		cmds += cp_wait_for_me(adreno_dev, cmds);
+	} else {
+		/*
+		 * A5xx has a separate opcode specifically to put the GPU
+		 * in and out of secure mode.
+		 */
+		*cmds++ = cp_packet(adreno_dev, CP_SET_SECURE_MODE, 1);
+		*cmds++ = set;
+	}
+
+	return cmds - start;
+}
+
+static inline int cp_mem_write(struct adreno_device *adreno_dev,
+		unsigned int *cmds, uint64_t gpuaddr, unsigned int value)
+{
+	int dwords = 0;
+
+	cmds[dwords++] = cp_mem_packet(adreno_dev, CP_MEM_WRITE, 2, 1);
+	dwords += cp_gpuaddr(adreno_dev, &cmds[dwords], gpuaddr);
+	cmds[dwords++] = value;
+
+	return dwords;
+}
+
+static int
+adreno_ringbuffer_addcmds(struct adreno_ringbuffer *rb,
+				unsigned int flags, unsigned int *cmds,
+				unsigned int sizedwords, uint32_t timestamp,
+				struct adreno_submit_time *time)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	unsigned int *ringcmds, *start;
+	unsigned int total_sizedwords = sizedwords;
+	unsigned int i;
+	unsigned int context_id = 0;
+	bool profile_ready;
+	struct adreno_context *drawctxt = rb->drawctxt_active;
+	struct kgsl_context *context = NULL;
+	bool secured_ctxt = false;
+	static unsigned int _seq_cnt;
+
+	if (drawctxt != NULL && kgsl_context_detached(&drawctxt->base) &&
+		!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
+		return -ENOENT;
+
+	/* On fault return error so that we don't keep submitting */
+	if (adreno_gpu_fault(adreno_dev) != 0)
+		return -EPROTO;
+
+	rb->timestamp++;
+
+	/* If this is a internal IB, use the global timestamp for it */
+	if (!drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
+		timestamp = rb->timestamp;
+	else {
+		context_id = drawctxt->base.id;
+		context = &drawctxt->base;
+	}
+
+	/*
+	 * Note that we cannot safely take drawctxt->mutex here without
+	 * potential mutex inversion with device->mutex which is held
+	 * here. As a result, any other code that accesses this variable
+	 * must also use device->mutex.
+	 */
+	if (drawctxt) {
+		drawctxt->internal_timestamp = rb->timestamp;
+		if (drawctxt->base.flags & KGSL_CONTEXT_SECURE)
+			secured_ctxt = true;
+	}
+
+	/*
+	 * If in stream ib profiling is enabled and there are counters
+	 * assigned, then space needs to be reserved for profiling.  This
+	 * space in the ringbuffer is always consumed (might be filled with
+	 * NOPs in error case.  profile_ready needs to be consistent through
+	 * the _addcmds call since it is allocating additional ringbuffer
+	 * command space.
+	 */
+	profile_ready = drawctxt &&
+		adreno_profile_assignments_ready(&adreno_dev->profile) &&
+		!(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE);
+
+	/*
+	 * reserve space to temporarily turn off protected mode
+	 * error checking if needed
+	 */
+	total_sizedwords += flags & KGSL_CMD_FLAGS_PMODE ? 4 : 0;
+	/* 2 dwords to store the start of command sequence */
+	total_sizedwords += 2;
+	/* internal ib command identifier for the ringbuffer */
+	total_sizedwords += (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) ? 2 : 0;
+
+	total_sizedwords += (secured_ctxt) ? 26 : 0;
+
+	/* _seq mem write for each submission */
+	total_sizedwords += 4;
+
+	/* context rollover */
+	if (adreno_is_a3xx(adreno_dev))
+		total_sizedwords += 3;
+
+	/* For HLSQ updates below */
+	if (adreno_is_a4xx(adreno_dev) || adreno_is_a3xx(adreno_dev))
+		total_sizedwords += 4;
+
+	if (gpudev->preemption_pre_ibsubmit &&
+				adreno_is_preemption_enabled(adreno_dev))
+		total_sizedwords += 22;
+
+	if (gpudev->preemption_post_ibsubmit &&
+				adreno_is_preemption_enabled(adreno_dev))
+		total_sizedwords += 5;
+
+	/*
+	 * a5xx uses 64 bit memory address. pm4 commands that involve read/write
+	 * from memory take 4 bytes more than a4xx because of 64 bit addressing.
+	 * This function is shared between gpucores, so reserve the max size
+	 * required in ringbuffer and adjust the write pointer depending on
+	 * gpucore at the end of this function.
+	 */
+	total_sizedwords += 8; /* sop timestamp */
+	total_sizedwords += 5; /* eop timestamp */
+
+	if (drawctxt && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
+		/* global timestamp without cache flush for non-zero context */
+		total_sizedwords += 4;
+	}
+
+	if (flags & KGSL_CMD_FLAGS_WFI)
+		total_sizedwords += 2; /* WFI */
+
+	if (profile_ready)
+		total_sizedwords += 8;   /* space for pre_ib and post_ib */
+
+	/* Add space for the power on shader fixup if we need it */
+	if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP)
+		total_sizedwords += 9;
+
+	/*
+	 * WAIT_MEM_WRITES - needed in the stall on fault case
+	 * to prevent out of order CP operations that can result
+	 * in a CACHE_FLUSH_TS interrupt storm
+	 */
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE,
+				&adreno_dev->ft_pf_policy))
+		total_sizedwords += 1;
+
+	ringcmds = adreno_ringbuffer_allocspace(rb, total_sizedwords);
+	if (IS_ERR(ringcmds))
+		return PTR_ERR(ringcmds);
+
+	start = ringcmds;
+
+	*ringcmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+	*ringcmds++ = KGSL_CMD_IDENTIFIER;
+
+	if (adreno_is_preemption_enabled(adreno_dev) &&
+				gpudev->preemption_pre_ibsubmit)
+		ringcmds += gpudev->preemption_pre_ibsubmit(
+					adreno_dev, rb, ringcmds, context);
+
+	if (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE) {
+		*ringcmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+		*ringcmds++ = KGSL_CMD_INTERNAL_IDENTIFIER;
+	}
+
+	if (flags & KGSL_CMD_FLAGS_PWRON_FIXUP) {
+		/* Disable protected mode for the fixup */
+		*ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*ringcmds++ = 0;
+
+		*ringcmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+		*ringcmds++ = KGSL_PWRON_FIXUP_IDENTIFIER;
+		*ringcmds++ = cp_mem_packet(adreno_dev,
+				CP_INDIRECT_BUFFER_PFE, 2, 1);
+		ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
+				adreno_dev->pwron_fixup.gpuaddr);
+		*ringcmds++ = adreno_dev->pwron_fixup_dwords;
+
+		/* Re-enable protected mode */
+		*ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*ringcmds++ = 1;
+	}
+
+	/* Add any IB required for profiling if it is enabled */
+	if (profile_ready)
+		adreno_profile_preib_processing(adreno_dev, drawctxt,
+				&flags, &ringcmds);
+
+	/* start-of-pipeline timestamp for the context */
+	if (drawctxt && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
+		ringcmds += cp_mem_write(adreno_dev, ringcmds,
+			MEMSTORE_ID_GPU_ADDR(device, context_id, soptimestamp),
+			timestamp);
+
+	/* start-of-pipeline timestamp for the ringbuffer */
+	ringcmds += cp_mem_write(adreno_dev, ringcmds,
+		MEMSTORE_RB_GPU_ADDR(device, rb, soptimestamp), rb->timestamp);
+
+	if (secured_ctxt)
+		ringcmds += cp_secure_mode(adreno_dev, ringcmds, 1);
+
+	if (flags & KGSL_CMD_FLAGS_PMODE) {
+		/* disable protected mode error checking */
+		*ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*ringcmds++ = 0;
+	}
+
+	for (i = 0; i < sizedwords; i++)
+		*ringcmds++ = cmds[i];
+
+	if (flags & KGSL_CMD_FLAGS_PMODE) {
+		/* re-enable protected mode error checking */
+		*ringcmds++ = cp_packet(adreno_dev, CP_SET_PROTECTED_MODE, 1);
+		*ringcmds++ = 1;
+	}
+
+	/*
+	 * Flush HLSQ lazy updates to make sure there are no
+	 * resources pending for indirect loads after the timestamp
+	 */
+	if (adreno_is_a4xx(adreno_dev) || adreno_is_a3xx(adreno_dev)) {
+		*ringcmds++ = cp_packet(adreno_dev, CP_EVENT_WRITE, 1);
+		*ringcmds++ = 0x07; /* HLSQ_FLUSH */
+		ringcmds += cp_wait_for_idle(adreno_dev, ringcmds);
+	}
+
+	/*
+	 * Add any postIB required for profiling if it is enabled and has
+	 * assigned counters
+	 */
+	if (profile_ready)
+		adreno_profile_postib_processing(adreno_dev, &flags, &ringcmds);
+
+	/*
+	 * WAIT_MEM_WRITES - needed in the stall on fault case to prevent
+	 * out of order CP operations that can result in a CACHE_FLUSH_TS
+	 * interrupt storm
+	 */
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE,
+				&adreno_dev->ft_pf_policy))
+		*ringcmds++ = cp_packet(adreno_dev, CP_WAIT_MEM_WRITES, 0);
+
+	/*
+	 * Do a unique memory write from the GPU. This can be used in
+	 * early detection of timestamp interrupt storms to stave
+	 * off system collapse.
+	 */
+	ringcmds += cp_mem_write(adreno_dev, ringcmds,
+		MEMSTORE_ID_GPU_ADDR(device, KGSL_MEMSTORE_GLOBAL,
+			ref_wait_ts), ++_seq_cnt);
+
+	/*
+	 * end-of-pipeline timestamp.  If per context timestamps is not
+	 * enabled, then drawctxt will be NULL or internal command flag will be
+	 * set and hence the rb timestamp will be used in else statement below.
+	 */
+	*ringcmds++ = cp_mem_packet(adreno_dev, CP_EVENT_WRITE, 3, 1);
+	if (drawctxt || (flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE))
+		*ringcmds++ = CACHE_FLUSH_TS | (1 << 31);
+	else
+		*ringcmds++ = CACHE_FLUSH_TS;
+
+	if (drawctxt && !(flags & KGSL_CMD_FLAGS_INTERNAL_ISSUE)) {
+		ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
+			MEMSTORE_ID_GPU_ADDR(device, context_id, eoptimestamp));
+		*ringcmds++ = timestamp;
+
+		/* Write the end of pipeline timestamp to the ringbuffer too */
+		ringcmds += cp_mem_write(adreno_dev, ringcmds,
+			MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp),
+			rb->timestamp);
+	} else {
+		ringcmds += cp_gpuaddr(adreno_dev, ringcmds,
+			MEMSTORE_RB_GPU_ADDR(device, rb, eoptimestamp));
+		*ringcmds++ = timestamp;
+	}
+
+	if (adreno_is_a3xx(adreno_dev)) {
+		/* Dummy set-constant to trigger context rollover */
+		*ringcmds++ = cp_packet(adreno_dev, CP_SET_CONSTANT, 2);
+		*ringcmds++ =
+			(0x4<<16) | (A3XX_HLSQ_CL_KERNEL_GROUP_X_REG - 0x2000);
+		*ringcmds++ = 0;
+	}
+
+	if (flags & KGSL_CMD_FLAGS_WFI)
+		ringcmds += cp_wait_for_idle(adreno_dev, ringcmds);
+
+	if (secured_ctxt)
+		ringcmds += cp_secure_mode(adreno_dev, ringcmds, 0);
+
+	if (gpudev->preemption_post_ibsubmit &&
+				adreno_is_preemption_enabled(adreno_dev))
+		ringcmds += gpudev->preemption_post_ibsubmit(adreno_dev,
+			ringcmds);
+
+	/*
+	 * If we have more ringbuffer commands than space reserved
+	 * in ringbuffer BUG() to fix this because it will lead to
+	 * weird errors.
+	 */
+	if ((ringcmds - start) > total_sizedwords)
+		BUG();
+	/*
+	 *  Allocate total_sizedwords space in RB, this is the max space
+	 *  required. If we have commands less than the space reserved in RB
+	 *  adjust the wptr accordingly.
+	 */
+	rb->_wptr = rb->_wptr - (total_sizedwords - (ringcmds - start));
+
+	adreno_ringbuffer_submit(rb, time);
+
+	return 0;
+}
+
+int
+adreno_ringbuffer_issuecmds(struct adreno_ringbuffer *rb,
+				unsigned int flags,
+				unsigned int *cmds,
+				int sizedwords)
+{
+	flags |= KGSL_CMD_FLAGS_INTERNAL_ISSUE;
+
+	return adreno_ringbuffer_addcmds(rb, flags, cmds,
+		sizedwords, 0, NULL);
+}
+
+static void adreno_ringbuffer_set_constraint(struct kgsl_device *device,
+			struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_context *context = drawobj->context;
+	/*
+	 * Check if the context has a constraint and constraint flags are
+	 * set.
+	 */
+	if (context->pwr_constraint.type &&
+		((context->flags & KGSL_CONTEXT_PWR_CONSTRAINT) ||
+			(drawobj->flags & KGSL_CONTEXT_PWR_CONSTRAINT)))
+		kgsl_pwrctrl_set_constraint(device, &context->pwr_constraint,
+						context->id);
+}
+
+static inline int _get_alwayson_counter(struct adreno_device *adreno_dev,
+		unsigned int *cmds, uint64_t gpuaddr)
+{
+	unsigned int *p = cmds;
+
+	*p++ = cp_mem_packet(adreno_dev, CP_REG_TO_MEM, 2, 1);
+
+	/*
+	 * For a4x and some a5x the alwayson_hi read through CPU
+	 * will be masked. Only do 32 bit CP reads for keeping the
+	 * numbers consistent
+	 */
+	if (ADRENO_GPUREV(adreno_dev) >= 400 &&
+		ADRENO_GPUREV(adreno_dev) <= ADRENO_REV_A530)
+		*p++ = adreno_getreg(adreno_dev,
+			ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO);
+	else
+		*p++ = adreno_getreg(adreno_dev,
+			ADRENO_REG_RBBM_ALWAYSON_COUNTER_LO) |
+			(1 << 30) | (2 << 18);
+	p += cp_gpuaddr(adreno_dev, p, gpuaddr);
+
+	return (unsigned int)(p - cmds);
+}
+
+/* adreno_rindbuffer_submitcmd - submit userspace IBs to the GPU */
+int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj,
+		struct adreno_submit_time *time)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+	struct kgsl_memobj_node *ib;
+	unsigned int numibs = 0;
+	unsigned int *link;
+	unsigned int *cmds;
+	struct kgsl_context *context;
+	struct adreno_context *drawctxt;
+	bool use_preamble = true;
+	bool user_profiling = false;
+	bool kernel_profiling = false;
+	int flags = KGSL_CMD_FLAGS_NONE;
+	int ret;
+	struct adreno_ringbuffer *rb;
+	struct kgsl_drawobj_profiling_buffer *profile_buffer = NULL;
+	unsigned int dwords = 0;
+	struct adreno_submit_time local;
+
+	struct kgsl_mem_entry *entry = cmdobj->profiling_buf_entry;
+
+	if (entry)
+		profile_buffer = kgsl_gpuaddr_to_vaddr(&entry->memdesc,
+					cmdobj->profiling_buffer_gpuaddr);
+
+	context = drawobj->context;
+	drawctxt = ADRENO_CONTEXT(context);
+
+	/* Get the total IBs in the list */
+	list_for_each_entry(ib, &cmdobj->cmdlist, node)
+		numibs++;
+
+	rb = drawctxt->rb;
+
+	/* process any profiling results that are available into the log_buf */
+	adreno_profile_process_results(adreno_dev);
+
+	/*
+	 * If SKIP CMD flag is set for current context
+	 * a) set SKIPCMD as fault_recovery for current commandbatch
+	 * b) store context's commandbatch fault_policy in current
+	 *    commandbatch fault_policy and clear context's commandbatch
+	 *    fault_policy
+	 * c) force preamble for commandbatch
+	 */
+	if (test_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv) &&
+		(!test_bit(CMDOBJ_SKIP, &cmdobj->priv))) {
+
+		set_bit(KGSL_FT_SKIPCMD, &cmdobj->fault_recovery);
+		cmdobj->fault_policy = drawctxt->fault_policy;
+		set_bit(CMDOBJ_FORCE_PREAMBLE, &cmdobj->priv);
+
+		/* if context is detached print fault recovery */
+		adreno_fault_skipcmd_detached(adreno_dev, drawctxt, drawobj);
+
+		/* clear the drawctxt flags */
+		clear_bit(ADRENO_CONTEXT_SKIP_CMD, &drawctxt->base.priv);
+		drawctxt->fault_policy = 0;
+	}
+
+	/*
+	 * When preamble is enabled, the preamble buffer with state restoration
+	 * commands are stored in the first node of the IB chain.
+	 * We can skip that if a context switch hasn't occurred.
+	 */
+
+	if ((drawctxt->base.flags & KGSL_CONTEXT_PREAMBLE) &&
+		!test_bit(CMDOBJ_FORCE_PREAMBLE, &cmdobj->priv) &&
+		(rb->drawctxt_active == drawctxt))
+		use_preamble = false;
+
+	/*
+	 * In skip mode don't issue the draw IBs but keep all the other
+	 * accoutrements of a submision (including the interrupt) to keep
+	 * the accounting sane. Set start_index and numibs to 0 to just
+	 * generate the start and end markers and skip everything else
+	 */
+	if (test_bit(CMDOBJ_SKIP, &cmdobj->priv)) {
+		use_preamble = false;
+		numibs = 0;
+	}
+
+	/*
+	 * a5xx uses 64 bit memory address. pm4 commands that involve read/write
+	 * from memory take 4 bytes more than a4xx because of 64 bit addressing.
+	 * This function is shared between gpucores, so reserve the max size
+	 * required and adjust the number of commands before calling addcmds.
+	 * Each submission needs 7 dwords max for wrappers and other red tape.
+	 */
+	dwords = 7;
+
+	/* Each IB takes up 30 dwords in worst case */
+	dwords += (numibs * 30);
+
+	if (drawobj->flags & KGSL_DRAWOBJ_PROFILING &&
+		!adreno_is_a3xx(adreno_dev) && profile_buffer) {
+		user_profiling = true;
+		dwords += 6;
+
+		/*
+		 * REG_TO_MEM packet on A5xx needs another ordinal.
+		 * Add 2 more dwords since we do profiling before and after.
+		 */
+		if (adreno_is_a5xx(adreno_dev))
+			dwords += 2;
+
+		/*
+		 * we want to use an adreno_submit_time struct to get the
+		 * precise moment when the command is submitted to the
+		 * ringbuffer.  If an upstream caller already passed down a
+		 * pointer piggyback on that otherwise use a local struct
+		 */
+
+		if (time == NULL)
+			time = &local;
+	}
+
+	if (test_bit(CMDOBJ_PROFILE, &cmdobj->priv)) {
+		kernel_profiling = true;
+		dwords += 6;
+		if (adreno_is_a5xx(adreno_dev))
+			dwords += 2;
+	}
+
+	if (gpudev->preemption_yield_enable &&
+				adreno_is_preemption_enabled(adreno_dev))
+		dwords += 8;
+
+	link = kcalloc(dwords, sizeof(unsigned int), GFP_KERNEL);
+	if (!link) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	cmds = link;
+
+	*cmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+	*cmds++ = KGSL_START_OF_IB_IDENTIFIER;
+
+	if (kernel_profiling) {
+		cmds += _get_alwayson_counter(adreno_dev, cmds,
+			adreno_dev->profile_buffer.gpuaddr +
+			ADRENO_DRAWOBJ_PROFILE_OFFSET(cmdobj->profile_index,
+				started));
+	}
+
+	/*
+	 * Add cmds to read the GPU ticks at the start of command obj and
+	 * write it into the appropriate command obj profiling buffer offset
+	 */
+	if (user_profiling) {
+		cmds += _get_alwayson_counter(adreno_dev, cmds,
+			cmdobj->profiling_buffer_gpuaddr +
+			offsetof(struct kgsl_drawobj_profiling_buffer,
+			gpu_ticks_submitted));
+	}
+
+	if (numibs) {
+		list_for_each_entry(ib, &cmdobj->cmdlist, node) {
+			/*
+			 * Skip 0 sized IBs - these are presumed to have been
+			 * removed from consideration by the FT policy
+			 */
+			if (ib->priv & MEMOBJ_SKIP ||
+				(ib->priv & MEMOBJ_PREAMBLE &&
+				use_preamble == false))
+				*cmds++ = cp_mem_packet(adreno_dev, CP_NOP,
+						3, 1);
+
+			*cmds++ = cp_mem_packet(adreno_dev,
+					CP_INDIRECT_BUFFER_PFE, 2, 1);
+			cmds += cp_gpuaddr(adreno_dev, cmds, ib->gpuaddr);
+			*cmds++ = (unsigned int) ib->size >> 2;
+			/* preamble is required on only for first command */
+			use_preamble = false;
+		}
+	}
+
+	if (gpudev->preemption_yield_enable &&
+				adreno_is_preemption_enabled(adreno_dev))
+		cmds += gpudev->preemption_yield_enable(cmds);
+
+	if (kernel_profiling) {
+		cmds += _get_alwayson_counter(adreno_dev, cmds,
+			adreno_dev->profile_buffer.gpuaddr +
+			ADRENO_DRAWOBJ_PROFILE_OFFSET(cmdobj->profile_index,
+				retired));
+	}
+
+	/*
+	 * Add cmds to read the GPU ticks at the end of command obj and
+	 * write it into the appropriate command obj profiling buffer offset
+	 */
+	if (user_profiling) {
+		cmds += _get_alwayson_counter(adreno_dev, cmds,
+			cmdobj->profiling_buffer_gpuaddr +
+			offsetof(struct kgsl_drawobj_profiling_buffer,
+			gpu_ticks_retired));
+	}
+
+	*cmds++ = cp_packet(adreno_dev, CP_NOP, 1);
+	*cmds++ = KGSL_END_OF_IB_IDENTIFIER;
+
+	/* Context switches commands should *always* be on the GPU */
+	ret = adreno_drawctxt_switch(adreno_dev, rb, drawctxt,
+		ADRENO_CONTEXT_SWITCH_FORCE_GPU);
+
+	/*
+	 * In the unlikely event of an error in the drawctxt switch,
+	 * treat it like a hang
+	 */
+	if (ret) {
+		/*
+		 * It is "normal" to get a -ENOSPC or a -ENOENT. Don't log it,
+		 * the upper layers know how to handle it
+		 */
+		if (ret != -ENOSPC && ret != -ENOENT)
+			KGSL_DRV_ERR(device,
+				"Unable to switch draw context: %d\n", ret);
+		goto done;
+	}
+
+	if (test_bit(CMDOBJ_WFI, &cmdobj->priv))
+		flags = KGSL_CMD_FLAGS_WFI;
+
+	/*
+	 * For some targets, we need to execute a dummy shader operation after a
+	 * power collapse
+	 */
+
+	if (test_and_clear_bit(ADRENO_DEVICE_PWRON, &adreno_dev->priv) &&
+		test_bit(ADRENO_DEVICE_PWRON_FIXUP, &adreno_dev->priv))
+		flags |= KGSL_CMD_FLAGS_PWRON_FIXUP;
+
+	/* Set the constraints before adding to ringbuffer */
+	adreno_ringbuffer_set_constraint(device, drawobj);
+
+	ret = adreno_ringbuffer_addcmds(rb, flags,
+					&link[0], (cmds - link),
+					drawobj->timestamp, time);
+
+	if (!ret) {
+		cmdobj->global_ts = drawctxt->internal_timestamp;
+
+		/* Put the timevalues in the profiling buffer */
+		if (user_profiling) {
+			/*
+			 * Return kernel clock time to the the client
+			 * if requested
+			 */
+			if (drawobj->flags & KGSL_DRAWOBJ_PROFILING_KTIME) {
+				uint64_t secs = time->ktime;
+
+				profile_buffer->wall_clock_ns =
+					do_div(secs, NSEC_PER_SEC);
+				profile_buffer->wall_clock_s = secs;
+			} else {
+				profile_buffer->wall_clock_s =
+					time->utime.tv_sec;
+				profile_buffer->wall_clock_ns =
+					time->utime.tv_nsec;
+			}
+			profile_buffer->gpu_ticks_queued = time->ticks;
+		}
+	}
+
+done:
+	/* Corresponding unmap to the memdesc map of profile_buffer */
+	if (entry)
+		kgsl_memdesc_unmap(&entry->memdesc);
+
+
+	trace_kgsl_issueibcmds(device, context->id, numibs, drawobj->timestamp,
+			drawobj->flags, ret, drawctxt->type);
+
+	kfree(link);
+	return ret;
+}
+
+/**
+ * adreno_ringbuffer_wait_callback() - Callback function for event registered
+ * on a ringbuffer timestamp
+ * @device: Device for which the the callback is valid
+ * @context: The context of the event
+ * @priv: The private parameter of the event
+ * @result: Result of the event trigger
+ */
+static void adreno_ringbuffer_wait_callback(struct kgsl_device *device,
+		struct kgsl_event_group *group,
+		void *priv, int result)
+{
+	struct adreno_ringbuffer *rb = group->priv;
+
+	wake_up_all(&rb->ts_expire_waitq);
+}
+
+/* check if timestamp is greater than the current rb timestamp */
+static inline int adreno_ringbuffer_check_timestamp(
+			struct adreno_ringbuffer *rb,
+			unsigned int timestamp, int type)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	unsigned int ts;
+
+	adreno_rb_readtimestamp(adreno_dev, rb, type, &ts);
+	return (timestamp_cmp(ts, timestamp) >= 0);
+}
+
+
+/**
+ * adreno_ringbuffer_waittimestamp() - Wait for a RB timestamp
+ * @rb: The ringbuffer to wait on
+ * @timestamp: The timestamp to wait for
+ * @msecs: The wait timeout period
+ */
+int adreno_ringbuffer_waittimestamp(struct adreno_ringbuffer *rb,
+					unsigned int timestamp,
+					unsigned int msecs)
+{
+	struct adreno_device *adreno_dev = ADRENO_RB_DEVICE(rb);
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+	unsigned long wait_time;
+
+	/* check immediately if timeout is 0 */
+	if (msecs == 0)
+		return adreno_ringbuffer_check_timestamp(rb,
+			timestamp, KGSL_TIMESTAMP_RETIRED) ? 0 : -EBUSY;
+
+	ret = kgsl_add_event(device, &rb->events, timestamp,
+		adreno_ringbuffer_wait_callback, NULL);
+	if (ret)
+		return ret;
+
+	mutex_unlock(&device->mutex);
+
+	wait_time = msecs_to_jiffies(msecs);
+	if (wait_event_timeout(rb->ts_expire_waitq,
+		!kgsl_event_pending(device, &rb->events, timestamp,
+				adreno_ringbuffer_wait_callback, NULL),
+		wait_time) == 0)
+		ret  = -ETIMEDOUT;
+
+	mutex_lock(&device->mutex);
+	/*
+	 * after wake up make sure that expected timestamp has retired
+	 * because the wakeup could have happened due to a cancel event
+	 */
+	if (!ret && !adreno_ringbuffer_check_timestamp(rb,
+		timestamp, KGSL_TIMESTAMP_RETIRED)) {
+		ret = -EAGAIN;
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/msm/adreno_ringbuffer.h b/drivers/gpu/msm/adreno_ringbuffer.h
new file mode 100644
index 0000000..63374af
--- /dev/null
+++ b/drivers/gpu/msm/adreno_ringbuffer.h
@@ -0,0 +1,212 @@
+/* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_RINGBUFFER_H
+#define __ADRENO_RINGBUFFER_H
+
+#include "kgsl_iommu.h"
+#include "adreno_iommu.h"
+#include "adreno_dispatch.h"
+
+/* Given a ringbuffer, return the adreno device that owns it */
+
+#define _RB_OFFSET(_id) (offsetof(struct adreno_device, ringbuffers) + \
+		((_id) * sizeof(struct adreno_ringbuffer)))
+
+#define ADRENO_RB_DEVICE(_rb) \
+	((struct adreno_device *) (((void *) (_rb)) - _RB_OFFSET((_rb)->id)))
+
+/* Adreno ringbuffer size in bytes */
+#define KGSL_RB_SIZE (32 * 1024)
+
+/*
+ * A handy macro to convert the RB size to dwords since most ringbuffer
+ * operations happen in dword increments
+ */
+#define KGSL_RB_DWORDS (KGSL_RB_SIZE >> 2)
+
+struct kgsl_device;
+struct kgsl_device_private;
+
+/**
+ * struct adreno_submit_time - utility structure to store the wall clock / GPU
+ * ticks at command submit time
+ * @ticks: GPU ticks at submit time (from the 19.2Mhz timer)
+ * @ktime: local clock time (in nanoseconds)
+ * @utime: Wall clock time
+ */
+struct adreno_submit_time {
+	uint64_t ticks;
+	u64 ktime;
+	struct timespec utime;
+};
+
+/**
+ * struct adreno_ringbuffer_pagetable_info - Contains fields used during a
+ * pagetable switch.
+ * @current_global_ptname: The current pagetable id being used by the GPU.
+ * Only the ringbuffers[0] current_global_ptname is used to keep track of
+ * the current pagetable id
+ * @current_rb_ptname: The current pagetable active on the given RB
+ * @incoming_ptname: Contains the incoming pagetable we are switching to. After
+ * switching of pagetable this value equals current_rb_ptname.
+ * @switch_pt_enable: Flag used during pagetable switch to check if pt
+ * switch can be skipped
+ * @ttbr0: value to program into TTBR0 during pagetable switch.
+ * @contextidr: value to program into CONTEXTIDR during pagetable switch.
+ */
+struct adreno_ringbuffer_pagetable_info {
+	int current_global_ptname;
+	int current_rb_ptname;
+	int incoming_ptname;
+	int switch_pt_enable;
+	uint64_t ttbr0;
+	unsigned int contextidr;
+};
+
+#define PT_INFO_OFFSET(_field) \
+	offsetof(struct adreno_ringbuffer_pagetable_info, _field)
+
+/**
+ * struct adreno_ringbuffer - Definition for an adreno ringbuffer object
+ * @flags: Internal control flags for the ringbuffer
+ * @buffer_desc: Pointer to the ringbuffer memory descripto
+ * @_wptr: The next value of wptr to be written to the hardware on submit
+ * @wptr: Local copy of the wptr offset last written to hardware
+ * @last_wptr: offset of the last wptr that was written to CFF
+ * @rb_ctx: The context that represents a ringbuffer
+ * @id: Priority level of the ringbuffer, also used as an ID
+ * @fault_detect_ts: The last retired global timestamp read during fault detect
+ * @timestamp: The RB's global timestamp
+ * @events: A kgsl_event_group for this context - contains the list of GPU
+ * events
+ * @drawctxt_active: The last pagetable that this ringbuffer is set to
+ * @preemption_desc: The memory descriptor containing
+ * preemption info written/read by CP
+ * @pagetable_desc: Memory to hold information about the pagetables being used
+ * and the commands to switch pagetable on the RB
+ * @dispatch_q: The dispatcher side queue for this ringbuffer
+ * @ts_expire_waitq: Wait queue to wait for rb timestamp to expire
+ * @ts_expire_waitq: Wait q to wait for rb timestamp to expire
+ * @wptr_preempt_end: Used during preemption to check that preemption occurred
+ * at the right rptr
+ * @gpr11: The gpr11 value of this RB
+ * @preempted_midway: Indicates that the RB was preempted before rptr = wptr
+ * @sched_timer: Timer that tracks how long RB has been waiting to be scheduled
+ * or how long it has been scheduled for after preempting in
+ * @starve_timer_state: Indicates the state of the wait.
+ * @preempt_lock: Lock to protect the wptr pointer while it is being updated
+ */
+struct adreno_ringbuffer {
+	uint32_t flags;
+	struct kgsl_memdesc buffer_desc;
+	unsigned int _wptr;
+	unsigned int wptr;
+	unsigned int last_wptr;
+	int id;
+	unsigned int fault_detect_ts;
+	unsigned int timestamp;
+	struct kgsl_event_group events;
+	struct adreno_context *drawctxt_active;
+	struct kgsl_memdesc preemption_desc;
+	struct kgsl_memdesc pagetable_desc;
+	struct adreno_dispatcher_drawqueue dispatch_q;
+	wait_queue_head_t ts_expire_waitq;
+	unsigned int wptr_preempt_end;
+	unsigned int gpr11;
+	int preempted_midway;
+	unsigned long sched_timer;
+	enum adreno_dispatcher_starve_timer_states starve_timer_state;
+	spinlock_t preempt_lock;
+};
+
+/* Returns the current ringbuffer */
+#define ADRENO_CURRENT_RINGBUFFER(a)	((a)->cur_rb)
+
+int cp_secure_mode(struct adreno_device *adreno_dev, uint *cmds, int set);
+
+int adreno_ringbuffer_issueibcmds(struct kgsl_device_private *dev_priv,
+				struct kgsl_context *context,
+				struct kgsl_drawobj *drawobj,
+				uint32_t *timestamp);
+
+int adreno_ringbuffer_submitcmd(struct adreno_device *adreno_dev,
+		struct kgsl_drawobj_cmd *cmdobj,
+		struct adreno_submit_time *time);
+
+int adreno_ringbuffer_probe(struct adreno_device *adreno_dev, bool nopreempt);
+
+int adreno_ringbuffer_start(struct adreno_device *adreno_dev,
+		unsigned int start_type);
+
+void adreno_ringbuffer_stop(struct adreno_device *adreno_dev);
+
+void adreno_ringbuffer_close(struct adreno_device *adreno_dev);
+
+int adreno_ringbuffer_issuecmds(struct adreno_ringbuffer *rb,
+					unsigned int flags,
+					unsigned int *cmdaddr,
+					int sizedwords);
+
+void adreno_ringbuffer_submit(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time);
+
+int adreno_ringbuffer_submit_spin(struct adreno_ringbuffer *rb,
+		struct adreno_submit_time *time, unsigned int timeout);
+
+void kgsl_cp_intrcallback(struct kgsl_device *device);
+
+unsigned int *adreno_ringbuffer_allocspace(struct adreno_ringbuffer *rb,
+						unsigned int numcmds);
+
+void adreno_ringbuffer_read_pfp_ucode(struct kgsl_device *device);
+
+void adreno_ringbuffer_read_pm4_ucode(struct kgsl_device *device);
+
+int adreno_ringbuffer_waittimestamp(struct adreno_ringbuffer *rb,
+					unsigned int timestamp,
+					unsigned int msecs);
+
+int adreno_rb_readtimestamp(struct adreno_device *adreno_dev,
+	void *priv, enum kgsl_timestamp_type type,
+	unsigned int *timestamp);
+
+static inline int adreno_ringbuffer_count(struct adreno_ringbuffer *rb,
+	unsigned int rptr)
+{
+	if (rb->wptr >= rptr)
+		return rb->wptr - rptr;
+	return rb->wptr + KGSL_RB_DWORDS - rptr;
+}
+
+/* Increment a value by 4 bytes with wrap-around based on size */
+static inline unsigned int adreno_ringbuffer_inc_wrapped(unsigned int val,
+							unsigned int size)
+{
+	return (val + sizeof(unsigned int)) % size;
+}
+
+/* Decrement a value by 4 bytes with wrap-around based on size */
+static inline unsigned int adreno_ringbuffer_dec_wrapped(unsigned int val,
+							unsigned int size)
+{
+	return (val + size - sizeof(unsigned int)) % size;
+}
+
+static inline int adreno_ringbuffer_set_pt_ctx(struct adreno_ringbuffer *rb,
+		struct kgsl_pagetable *pt, struct adreno_context *context,
+		unsigned long flags)
+{
+	return adreno_iommu_set_pt_ctx(rb, pt, context, flags);
+}
+
+#endif  /* __ADRENO_RINGBUFFER_H */
diff --git a/drivers/gpu/msm/adreno_snapshot.c b/drivers/gpu/msm/adreno_snapshot.c
new file mode 100644
index 0000000..e79e5e3
--- /dev/null
+++ b/drivers/gpu/msm/adreno_snapshot.c
@@ -0,0 +1,1224 @@
+/* Copyright (c) 2012-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_snapshot.h"
+
+#include "adreno.h"
+#include "adreno_pm4types.h"
+#include "a3xx_reg.h"
+#include "adreno_cp_parser.h"
+#include "adreno_snapshot.h"
+#include "adreno_a5xx.h"
+
+#define VPC_MEMORY_BANKS 4
+
+/* Maintain a list of the objects we see during parsing */
+
+#define SNAPSHOT_OBJ_BUFSIZE 64
+
+#define SNAPSHOT_OBJ_TYPE_IB 0
+
+/* Used to print error message if an IB has too many objects in it */
+static int ib_max_objs;
+
+struct snapshot_rb_params {
+	struct kgsl_snapshot *snapshot;
+	struct adreno_ringbuffer *rb;
+};
+
+/* Keep track of how many bytes are frozen after a snapshot and tell the user */
+static size_t snapshot_frozen_objsize;
+
+static struct kgsl_snapshot_object objbuf[SNAPSHOT_OBJ_BUFSIZE];
+
+/* Pointer to the next open entry in the object list */
+static unsigned int objbufptr;
+
+static inline int adreno_rb_ctxtswitch(struct adreno_device *adreno_dev,
+				   unsigned int *cmd)
+{
+	return cmd[0] == cp_packet(adreno_dev, CP_NOP, 1) &&
+		cmd[1] == KGSL_CONTEXT_TO_MEM_IDENTIFIER;
+}
+
+/* Push a new buffer object onto the list */
+static void push_object(int type,
+	struct kgsl_process_private *process,
+	uint64_t gpuaddr, uint64_t dwords)
+{
+	int index;
+	struct kgsl_mem_entry *entry;
+
+	if (process == NULL)
+		return;
+
+	/*
+	 * Sometimes IBs can be reused in the same dump.  Because we parse from
+	 * oldest to newest, if we come across an IB that has already been used,
+	 * assume that it has been reused and update the list with the newest
+	 * size.
+	 */
+
+	for (index = 0; index < objbufptr; index++) {
+		if (objbuf[index].gpuaddr == gpuaddr &&
+			objbuf[index].entry->priv == process) {
+
+			objbuf[index].size = max_t(uint64_t,
+						objbuf[index].size,
+						dwords << 2);
+			return;
+		}
+	}
+
+	if (objbufptr == SNAPSHOT_OBJ_BUFSIZE) {
+		KGSL_CORE_ERR("snapshot: too many snapshot objects\n");
+		return;
+	}
+
+	entry = kgsl_sharedmem_find(process, gpuaddr);
+	if (entry == NULL) {
+		KGSL_CORE_ERR("snapshot: Can't find entry for 0x%016llX\n",
+			gpuaddr);
+		return;
+	}
+
+	if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, dwords << 2)) {
+		KGSL_CORE_ERR("snapshot: Mem entry 0x%016llX is too small\n",
+			gpuaddr);
+		kgsl_mem_entry_put(entry);
+		return;
+	}
+
+	/* Put it on the list of things to parse */
+	objbuf[objbufptr].type = type;
+	objbuf[objbufptr].gpuaddr = gpuaddr;
+	objbuf[objbufptr].size = dwords << 2;
+	objbuf[objbufptr++].entry = entry;
+}
+
+/*
+ * Returns index of the specified object is already on the list of buffers
+ * to be dumped
+ */
+
+static int find_object(int type, uint64_t gpuaddr,
+		struct kgsl_process_private *process)
+{
+	int index;
+
+	for (index = 0; index < objbufptr; index++) {
+		if (objbuf[index].gpuaddr == gpuaddr &&
+			objbuf[index].entry->priv == process)
+			return index;
+	}
+	return -ENOENT;
+}
+
+/*
+ * snapshot_freeze_obj_list() - Take a list of ib objects and freeze their
+ * memory for snapshot
+ * @snapshot: The snapshot data.
+ * @process: The process to which the IB belongs
+ * @ib_obj_list: List of the IB objects
+ * @ib2base: IB2 base address at time of the fault
+ *
+ * Returns 0 on success else error code
+ */
+static int snapshot_freeze_obj_list(struct kgsl_snapshot *snapshot,
+		struct kgsl_process_private *process,
+		struct adreno_ib_object_list *ib_obj_list,
+		uint64_t ib2base)
+{
+	int ret = 0;
+	struct adreno_ib_object *ib_objs;
+	int i;
+
+	for (i = 0; i < ib_obj_list->num_objs; i++) {
+		int temp_ret;
+		int index;
+		int freeze = 1;
+
+		ib_objs = &(ib_obj_list->obj_list[i]);
+		/* Make sure this object is not going to be saved statically */
+		for (index = 0; index < objbufptr; index++) {
+			if ((objbuf[index].gpuaddr <= ib_objs->gpuaddr) &&
+				((objbuf[index].gpuaddr +
+				(objbuf[index].size)) >=
+				(ib_objs->gpuaddr + ib_objs->size)) &&
+				(objbuf[index].entry->priv == process)) {
+				freeze = 0;
+				break;
+			}
+		}
+
+		if (freeze) {
+			/* Save current IB2 statically */
+			if (ib2base == ib_objs->gpuaddr) {
+				push_object(SNAPSHOT_OBJ_TYPE_IB,
+				process, ib_objs->gpuaddr, ib_objs->size >> 2);
+			} else {
+				temp_ret = kgsl_snapshot_get_object(snapshot,
+					process, ib_objs->gpuaddr,
+					ib_objs->size,
+					ib_objs->snapshot_obj_type);
+				if (temp_ret < 0) {
+					if (ret >= 0)
+						ret = temp_ret;
+				} else {
+					snapshot_frozen_objsize += temp_ret;
+				}
+			}
+		}
+	}
+	return ret;
+}
+
+/*
+ * We want to store the last executed IB1 and IB2 in the static region to ensure
+ * that we get at least some information out of the snapshot even if we can't
+ * access the dynamic data from the sysfs file.  Push all other IBs on the
+ * dynamic list
+ */
+static inline void parse_ib(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		struct kgsl_process_private *process,
+		uint64_t gpuaddr, uint64_t dwords)
+{
+	struct adreno_ib_object_list *ib_obj_list;
+
+	/*
+	 * Check the IB address - if it is either the last executed IB1
+	 * then push it into the static blob otherwise put it in the dynamic
+	 * list
+	 */
+	if (gpuaddr == snapshot->ib1base) {
+		push_object(SNAPSHOT_OBJ_TYPE_IB, process,
+			gpuaddr, dwords);
+		return;
+	}
+
+	if (kgsl_snapshot_have_object(snapshot, process,
+					gpuaddr, dwords << 2))
+		return;
+
+	if (-E2BIG == adreno_ib_create_object_list(device, process,
+				gpuaddr, dwords, &ib_obj_list))
+		ib_max_objs = 1;
+
+	if (ib_obj_list)
+		kgsl_snapshot_add_ib_obj_list(snapshot, ib_obj_list);
+
+}
+
+static inline bool iommu_is_setstate_addr(struct kgsl_device *device,
+		uint64_t gpuaddr, uint64_t size)
+{
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+
+	if (kgsl_mmu_get_mmutype(device) != KGSL_MMU_TYPE_IOMMU)
+		return false;
+
+	return kgsl_gpuaddr_in_memdesc(&iommu->setstate, gpuaddr,
+			size);
+}
+
+static void dump_all_ibs(struct kgsl_device *device,
+			struct adreno_ringbuffer *rb,
+			struct kgsl_snapshot *snapshot)
+{
+	int index = 0;
+	unsigned int *rbptr;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	rbptr = rb->buffer_desc.hostptr;
+
+	for (index = 0; index < KGSL_RB_DWORDS;) {
+
+		if (adreno_cmd_is_ib(adreno_dev, rbptr[index])) {
+			uint64_t ibaddr;
+			uint64_t ibsize;
+
+			if (ADRENO_LEGACY_PM4(adreno_dev)) {
+				ibaddr = rbptr[index + 1];
+				ibsize = rbptr[index + 2];
+				index += 3;
+			} else {
+				ibaddr = rbptr[index + 2];
+				ibaddr = ibaddr << 32 | rbptr[index + 1];
+				ibsize = rbptr[index + 3];
+				index += 4;
+			}
+
+			/* Don't parse known global IBs */
+			if (iommu_is_setstate_addr(device, ibaddr, ibsize))
+				continue;
+
+			if (kgsl_gpuaddr_in_memdesc(&adreno_dev->pwron_fixup,
+				ibaddr, ibsize))
+				continue;
+
+			parse_ib(device, snapshot, snapshot->process, ibaddr,
+				ibsize);
+		} else
+			index = index + 1;
+	}
+}
+
+/**
+ * snapshot_rb_ibs() - Dump rb data and capture the IB's in the RB as well
+ * @device: Pointer to a KGSL device
+ * @rb: The RB to dump
+ * @data: Pointer to memory where the RB data is to be dumped
+ * @snapshot: Pointer to information about the current snapshot being taken
+ */
+static void snapshot_rb_ibs(struct kgsl_device *device,
+		struct adreno_ringbuffer *rb,
+		struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int rptr, *rbptr;
+	int index, i;
+	int parse_ibs = 0, ib_parse_start;
+
+	/* Get the current read pointers for the RB */
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_RB_RPTR, &rptr);
+
+	/*
+	 * Figure out the window of ringbuffer data to dump.  First we need to
+	 * find where the last processed IB ws submitted.  Start walking back
+	 * from the rptr
+	 */
+
+	index = rptr;
+	rbptr = rb->buffer_desc.hostptr;
+
+	do {
+		index--;
+
+		if (index < 0) {
+			if (ADRENO_LEGACY_PM4(adreno_dev))
+				index = KGSL_RB_DWORDS - 3;
+			else
+				index = KGSL_RB_DWORDS - 4;
+
+			/* We wrapped without finding what we wanted */
+			if (index < rb->wptr) {
+				index = rb->wptr;
+				break;
+			}
+		}
+
+		if (adreno_cmd_is_ib(adreno_dev, rbptr[index])) {
+			if (ADRENO_LEGACY_PM4(adreno_dev)) {
+				if (rbptr[index + 1] == snapshot->ib1base)
+					break;
+			} else {
+				uint64_t ibaddr;
+
+				ibaddr = rbptr[index + 2];
+				ibaddr = ibaddr << 32 | rbptr[index + 1];
+				if (ibaddr == snapshot->ib1base)
+					break;
+			}
+		}
+	} while (index != rb->wptr);
+
+	/*
+	 * If the ib1 was not found, for example, if ib1base was restored
+	 * incorrectly after preemption, then simply dump the entire
+	 * ringbuffer along with all the IBs in the ringbuffer.
+	 */
+
+	if (index == rb->wptr) {
+		dump_all_ibs(device, rb, snapshot);
+		return;
+	}
+
+	/*
+	 * index points at the last submitted IB. We can only trust that the
+	 * memory between the context switch and the hanging IB is valid, so
+	 * the next step is to find the context switch before the submission
+	 */
+
+	while (index != rb->wptr) {
+		index--;
+
+		if (index < 0) {
+			index = KGSL_RB_DWORDS - 2;
+
+			/*
+			 * Wrapped without finding the context switch. This is
+			 * harmless - we should still have enough data to dump a
+			 * valid state
+			 */
+
+			if (index < rb->wptr) {
+				index = rb->wptr;
+				break;
+			}
+		}
+
+		/* Break if the current packet is a context switch identifier */
+		if ((rbptr[index] == cp_packet(adreno_dev, CP_NOP, 1)) &&
+			(rbptr[index + 1] == KGSL_CONTEXT_TO_MEM_IDENTIFIER))
+			break;
+	}
+
+	/*
+	 * Index represents the start of the window of interest.  We will try
+	 * to dump all buffers between here and the rptr
+	 */
+
+	ib_parse_start = index;
+
+	/*
+	 * Loop through the RB, looking for indirect buffers and MMU pagetable
+	 * changes
+	 */
+
+	index = rb->wptr;
+	for (i = 0; i < KGSL_RB_DWORDS; i++) {
+		/*
+		 * Only parse IBs between the start and the rptr or the next
+		 * context switch, whichever comes first
+		 */
+
+		if (parse_ibs == 0 && index == ib_parse_start)
+			parse_ibs = 1;
+		else if (index == rptr || adreno_rb_ctxtswitch(adreno_dev,
+							&rbptr[index]))
+			parse_ibs = 0;
+
+		if (parse_ibs && adreno_cmd_is_ib(adreno_dev, rbptr[index])) {
+			uint64_t ibaddr;
+			uint64_t ibsize;
+
+			if (ADRENO_LEGACY_PM4(adreno_dev)) {
+				ibaddr = rbptr[index + 1];
+				ibsize = rbptr[index + 2];
+			} else {
+				ibaddr = rbptr[index + 2];
+				ibaddr = ibaddr << 32 | rbptr[index + 1];
+				ibsize = rbptr[index + 3];
+			}
+
+			/* Don't parse known global IBs */
+			if (iommu_is_setstate_addr(device, ibaddr, ibsize))
+				continue;
+
+			if (kgsl_gpuaddr_in_memdesc(&adreno_dev->pwron_fixup,
+				ibaddr, ibsize))
+				continue;
+
+			parse_ib(device, snapshot, snapshot->process,
+				ibaddr, ibsize);
+		}
+
+		index = (index + 1) % KGSL_RB_DWORDS;
+	}
+
+}
+
+/* Snapshot the ringbuffer memory */
+static size_t snapshot_rb(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_snapshot_rb_v2 *header = (struct kgsl_snapshot_rb_v2 *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct snapshot_rb_params *snap_rb_params = priv;
+	struct kgsl_snapshot *snapshot = snap_rb_params->snapshot;
+	struct adreno_ringbuffer *rb = snap_rb_params->rb;
+
+	/*
+	 * Dump the entire ringbuffer - the parser can choose how much of it to
+	 * process
+	 */
+
+	if (remain < KGSL_RB_SIZE + sizeof(*header)) {
+		KGSL_CORE_ERR("snapshot: Not enough memory for the rb section");
+		return 0;
+	}
+
+	/* Write the sub-header for the section */
+	header->start = 0;
+	header->end = KGSL_RB_DWORDS;
+	header->wptr = rb->wptr;
+	header->rptr = adreno_get_rptr(rb);
+	header->rbsize = KGSL_RB_DWORDS;
+	header->count = KGSL_RB_DWORDS;
+	adreno_rb_readtimestamp(adreno_dev, rb, KGSL_TIMESTAMP_QUEUED,
+					&header->timestamp_queued);
+	adreno_rb_readtimestamp(adreno_dev, rb, KGSL_TIMESTAMP_RETIRED,
+					&header->timestamp_retired);
+	header->gpuaddr = rb->buffer_desc.gpuaddr;
+	header->id = rb->id;
+
+	if (rb == adreno_dev->cur_rb)
+		snapshot_rb_ibs(device, rb, snapshot);
+
+	/* Just copy the ringbuffer, there are no active IBs */
+	memcpy(data, rb->buffer_desc.hostptr, KGSL_RB_SIZE);
+
+	/* Return the size of the section */
+	return KGSL_RB_SIZE + sizeof(*header);
+}
+
+static int _count_mem_entries(int id, void *ptr, void *data)
+{
+	int *count = data;
+	*count = *count + 1;
+	return 0;
+}
+
+struct mem_entry {
+	uint64_t gpuaddr;
+	uint64_t size;
+	unsigned int type;
+} __packed;
+
+static int _save_mem_entries(int id, void *ptr, void *data)
+{
+	struct kgsl_mem_entry *entry = ptr;
+	struct mem_entry *m = (struct mem_entry *) data;
+	unsigned int index = id - 1;
+
+	m[index].gpuaddr = entry->memdesc.gpuaddr;
+	m[index].size = entry->memdesc.size;
+	m[index].type = kgsl_memdesc_get_memtype(&entry->memdesc);
+
+	return 0;
+}
+
+static size_t snapshot_capture_mem_list(struct kgsl_device *device,
+		u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_mem_list_v2 *header =
+		(struct kgsl_snapshot_mem_list_v2 *)buf;
+	int num_mem = 0;
+	int ret = 0;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	struct kgsl_process_private *process = priv;
+
+	/* we need a process to search! */
+	if (process == NULL)
+		return 0;
+
+	spin_lock(&process->mem_lock);
+
+	/* We need to know the number of memory objects that the process has */
+	idr_for_each(&process->mem_idr, _count_mem_entries, &num_mem);
+
+	if (num_mem == 0)
+		goto out;
+
+	if (remain < ((num_mem * sizeof(struct mem_entry)) + sizeof(*header))) {
+		KGSL_CORE_ERR("snapshot: Not enough memory for the mem list");
+		goto out;
+	}
+
+	header->num_entries = num_mem;
+	header->ptbase = kgsl_mmu_pagetable_get_ttbr0(process->pagetable);
+
+	/*
+	 * Walk through the memory list and store the
+	 * tuples(gpuaddr, size, memtype) in snapshot
+	 */
+	idr_for_each(&process->mem_idr, _save_mem_entries, data);
+
+	ret = sizeof(*header) + (num_mem * sizeof(struct mem_entry));
+out:
+	spin_unlock(&process->mem_lock);
+	return ret;
+}
+
+struct snapshot_ib_meta {
+	struct kgsl_snapshot *snapshot;
+	struct kgsl_snapshot_object *obj;
+	uint64_t ib1base;
+	uint64_t ib1size;
+	uint64_t ib2base;
+	uint64_t ib2size;
+};
+
+void kgsl_snapshot_add_active_ib_obj_list(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	struct adreno_ib_object_list *ib_obj_list;
+	int index = -ENOENT;
+
+	if (!snapshot->ib1dumped)
+		index = find_object(SNAPSHOT_OBJ_TYPE_IB, snapshot->ib1base,
+				snapshot->process);
+
+	/* only do this for IB1 because the IB2's are part of IB1 objects */
+	if ((index != -ENOENT) &&
+			(snapshot->ib1base == objbuf[index].gpuaddr)) {
+		if (-E2BIG == adreno_ib_create_object_list(device,
+					objbuf[index].entry->priv,
+					objbuf[index].gpuaddr,
+					objbuf[index].size >> 2,
+					&ib_obj_list))
+			ib_max_objs = 1;
+		if (ib_obj_list) {
+			/* freeze the IB objects in the IB */
+			snapshot_freeze_obj_list(snapshot,
+					objbuf[index].entry->priv,
+					ib_obj_list, snapshot->ib2base);
+			adreno_ib_destroy_obj_list(ib_obj_list);
+		}
+	} else {
+		/* Get the IB2 index from parsed object */
+		index = find_object(SNAPSHOT_OBJ_TYPE_IB, snapshot->ib2base,
+				snapshot->process);
+
+		if (index != -ENOENT)
+			parse_ib(device, snapshot, snapshot->process,
+				snapshot->ib2base, objbuf[index].size >> 2);
+	}
+}
+
+/*
+ * active_ib_is_parsed() - Checks if active ib is already parsed
+ * @gpuaddr: Active IB base address at the time of fault
+ * @size: Active IB size
+ * @process: The process to which the IB belongs
+ *
+ * Function returns true if the active is already is parsed
+ * else false
+ */
+static bool active_ib_is_parsed(uint64_t gpuaddr, uint64_t size,
+		struct kgsl_process_private *process)
+{
+	int  index;
+	/* go through the static list for gpuaddr is in list or not */
+	for (index = 0; index < objbufptr; index++) {
+		if ((objbuf[index].gpuaddr <= gpuaddr) &&
+				((objbuf[index].gpuaddr +
+				  (objbuf[index].size)) >=
+				 (gpuaddr + size)) &&
+				(objbuf[index].entry->priv == process))
+			return true;
+	}
+	return false;
+}
+/* Snapshot the memory for an indirect buffer */
+static size_t snapshot_ib(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_snapshot_ib_v2 *header = (struct kgsl_snapshot_ib_v2 *)buf;
+	struct snapshot_ib_meta *meta = priv;
+	unsigned int *src;
+	unsigned int *dst = (unsigned int *)(buf + sizeof(*header));
+	struct adreno_ib_object_list *ib_obj_list;
+	struct kgsl_snapshot *snapshot;
+	struct kgsl_snapshot_object *obj;
+
+	if (meta == NULL || meta->snapshot == NULL || meta->obj == NULL) {
+		KGSL_CORE_ERR("snapshot: bad metadata");
+		return 0;
+	}
+	snapshot = meta->snapshot;
+	obj = meta->obj;
+
+	if (remain < (obj->size + sizeof(*header))) {
+		KGSL_CORE_ERR("snapshot: Not enough memory for the ib\n");
+		return 0;
+	}
+
+	src = kgsl_gpuaddr_to_vaddr(&obj->entry->memdesc, obj->gpuaddr);
+	if (src == NULL) {
+		KGSL_DRV_ERR(device,
+			"snapshot: Unable to map GPU memory object 0x%016llX into the kernel\n",
+			obj->gpuaddr);
+		return 0;
+	}
+
+	/* only do this for IB1 because the IB2's are part of IB1 objects */
+	if (meta->ib1base == obj->gpuaddr) {
+
+		snapshot->ib1dumped = active_ib_is_parsed(obj->gpuaddr,
+					obj->size, obj->entry->priv);
+		if (-E2BIG == adreno_ib_create_object_list(device,
+				obj->entry->priv,
+				obj->gpuaddr, obj->size >> 2,
+				&ib_obj_list))
+			ib_max_objs = 1;
+		if (ib_obj_list) {
+			/* freeze the IB objects in the IB */
+			snapshot_freeze_obj_list(snapshot,
+						obj->entry->priv,
+						ib_obj_list, meta->ib2base);
+			adreno_ib_destroy_obj_list(ib_obj_list);
+		}
+	}
+
+
+	if (meta->ib2base == obj->gpuaddr)
+		snapshot->ib2dumped = active_ib_is_parsed(obj->gpuaddr,
+					obj->size, obj->entry->priv);
+
+	/* Write the sub-header for the section */
+	header->gpuaddr = obj->gpuaddr;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(obj->entry->priv->pagetable);
+	header->size = obj->size >> 2;
+
+	/* Write the contents of the ib */
+	memcpy((void *)dst, (void *)src, (size_t) obj->size);
+	/* Write the contents of the ib */
+
+	return obj->size + sizeof(*header);
+}
+
+/* Dump another item on the current pending list */
+static void dump_object(struct kgsl_device *device, int obj,
+		struct kgsl_snapshot *snapshot)
+{
+	struct snapshot_ib_meta meta;
+
+	switch (objbuf[obj].type) {
+	case SNAPSHOT_OBJ_TYPE_IB:
+		meta.snapshot = snapshot;
+		meta.obj = &objbuf[obj];
+		meta.ib1base = snapshot->ib1base;
+		meta.ib1size = snapshot->ib1size;
+		meta.ib2base = snapshot->ib2base;
+		meta.ib2size = snapshot->ib2size;
+
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_IB_V2,
+			snapshot, snapshot_ib, &meta);
+		if (objbuf[obj].entry) {
+			kgsl_memdesc_unmap(&(objbuf[obj].entry->memdesc));
+			kgsl_mem_entry_put(objbuf[obj].entry);
+		}
+		break;
+	default:
+		KGSL_CORE_ERR("snapshot: Invalid snapshot object type: %d\n",
+			objbuf[obj].type);
+		break;
+	}
+}
+
+/* setup_fault process - Find kgsl_process_private struct that caused the fault
+ *
+ * Find the faulting process based what the dispatcher thinks happened and
+ * what the hardware is using for the current pagetable. The process struct
+ * will be used to look up GPU addresses that are encountered while parsing
+ * the GPU state.
+ */
+static void setup_fault_process(struct kgsl_device *device,
+				struct kgsl_snapshot *snapshot,
+				struct kgsl_process_private *process)
+{
+	u64 hw_ptbase, proc_ptbase;
+
+	if (process != NULL && !kgsl_process_private_get(process))
+		process = NULL;
+
+	/* Get the physical address of the MMU pagetable */
+	hw_ptbase = kgsl_mmu_get_current_ttbr0(&device->mmu);
+
+	/* if we have an input process, make sure the ptbases match */
+	if (process) {
+		proc_ptbase = kgsl_mmu_pagetable_get_ttbr0(process->pagetable);
+		/* agreement! No need to check further */
+		if (hw_ptbase == proc_ptbase)
+			goto done;
+
+		kgsl_process_private_put(process);
+		process = NULL;
+		KGSL_CORE_ERR("snapshot: ptbase mismatch hw %llx sw %llx\n",
+				hw_ptbase, proc_ptbase);
+	}
+
+	/* try to find the right pagetable by walking the process list */
+	if (kgsl_mmu_is_perprocess(&device->mmu)) {
+		struct kgsl_process_private *tmp;
+
+		mutex_lock(&kgsl_driver.process_mutex);
+		list_for_each_entry(tmp, &kgsl_driver.process_list, list) {
+			u64 pt_ttbr0;
+
+			pt_ttbr0 = kgsl_mmu_pagetable_get_ttbr0(tmp->pagetable);
+			if ((pt_ttbr0 == hw_ptbase)
+			    && kgsl_process_private_get(tmp)) {
+				process = tmp;
+				break;
+			}
+		}
+		mutex_unlock(&kgsl_driver.process_mutex);
+	}
+done:
+	snapshot->process = process;
+}
+
+/* Snapshot a global memory buffer */
+static size_t snapshot_global(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_memdesc *memdesc = priv;
+
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)buf;
+
+	u8 *ptr = buf + sizeof(*header);
+
+	if (memdesc->size == 0)
+		return 0;
+
+	if (remain < (memdesc->size + sizeof(*header))) {
+		KGSL_CORE_ERR("snapshot: Not enough memory for the memdesc\n");
+		return 0;
+	}
+
+	if (memdesc->hostptr == NULL) {
+		KGSL_CORE_ERR(
+		"snapshot: no kernel mapping for global object 0x%016llX\n",
+		memdesc->gpuaddr);
+		return 0;
+	}
+
+	header->size = memdesc->size >> 2;
+	header->gpuaddr = memdesc->gpuaddr;
+	header->ptbase = MMU_DEFAULT_TTBR0(device);
+	header->type = SNAPSHOT_GPU_OBJECT_GLOBAL;
+
+	memcpy(ptr, memdesc->hostptr, memdesc->size);
+
+	return memdesc->size + sizeof(*header);
+}
+
+/* Snapshot IOMMU specific buffers */
+static void adreno_snapshot_iommu(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+		snapshot, snapshot_global, &iommu->setstate);
+
+	if (ADRENO_FEATURE(adreno_dev, ADRENO_PREEMPTION))
+		kgsl_snapshot_add_section(device,
+			KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+			snapshot, snapshot_global, &iommu->smmu_info);
+}
+
+static void adreno_snapshot_ringbuffer(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot, struct adreno_ringbuffer *rb)
+{
+	struct snapshot_rb_params params = {
+		.snapshot = snapshot,
+		.rb = rb,
+	};
+
+	if (rb == NULL)
+		return;
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_RB_V2, snapshot,
+		snapshot_rb, &params);
+}
+
+/* adreno_snapshot - Snapshot the Adreno GPU state
+ * @device - KGSL device to snapshot
+ * @snapshot - Pointer to the snapshot instance
+ * @context - context that caused the fault, if known by the driver
+ * This is a hook function called by kgsl_snapshot to snapshot the
+ * Adreno specific information for the GPU snapshot.  In turn, this function
+ * calls the GPU specific snapshot function to get core specific information.
+ */
+void adreno_snapshot(struct kgsl_device *device, struct kgsl_snapshot *snapshot,
+			struct kgsl_context *context)
+{
+	unsigned int i;
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct adreno_gpudev *gpudev = ADRENO_GPU_DEVICE(adreno_dev);
+
+	ib_max_objs = 0;
+	/* Reset the list of objects */
+	objbufptr = 0;
+
+	snapshot_frozen_objsize = 0;
+
+	setup_fault_process(device, snapshot,
+			context ? context->proc_priv : NULL);
+
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB1_BASE,
+			ADRENO_REG_CP_IB1_BASE_HI, &snapshot->ib1base);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB1_BUFSZ, &snapshot->ib1size);
+	adreno_readreg64(adreno_dev, ADRENO_REG_CP_IB2_BASE,
+			ADRENO_REG_CP_IB2_BASE_HI, &snapshot->ib2base);
+	adreno_readreg(adreno_dev, ADRENO_REG_CP_IB2_BUFSZ, &snapshot->ib2size);
+
+	snapshot->ib1dumped = false;
+	snapshot->ib2dumped = false;
+
+	adreno_snapshot_ringbuffer(device, snapshot, adreno_dev->cur_rb);
+
+	/* Dump the prev ringbuffer */
+	if (adreno_dev->prev_rb != adreno_dev->cur_rb)
+		adreno_snapshot_ringbuffer(device, snapshot,
+			adreno_dev->prev_rb);
+
+	if ((adreno_dev->next_rb != adreno_dev->prev_rb) &&
+		 (adreno_dev->next_rb != adreno_dev->cur_rb))
+		adreno_snapshot_ringbuffer(device, snapshot,
+			adreno_dev->next_rb);
+
+	/* Add GPU specific sections - registers mainly, but other stuff too */
+	if (gpudev->snapshot)
+		gpudev->snapshot(adreno_dev, snapshot);
+
+	/* Dump selected global buffers */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+			snapshot, snapshot_global, &device->memstore);
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2,
+			snapshot, snapshot_global,
+			&adreno_dev->pwron_fixup);
+
+	if (kgsl_mmu_get_mmutype(device) == KGSL_MMU_TYPE_IOMMU)
+		adreno_snapshot_iommu(device, snapshot);
+
+	/*
+	 * Add a section that lists (gpuaddr, size, memtype) tuples of the
+	 * hanging process
+	 */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_MEMLIST_V2,
+			snapshot, snapshot_capture_mem_list, snapshot->process);
+	/*
+	 * Make sure that the last IB1 that was being executed is dumped.
+	 * Since this was the last IB1 that was processed, we should have
+	 * already added it to the list during the ringbuffer parse but we
+	 * want to be double plus sure.
+	 * The problem is that IB size from the register is the unprocessed size
+	 * of the buffer not the original size, so if we didn't catch this
+	 * buffer being directly used in the RB, then we might not be able to
+	 * dump the whole thing. Print a warning message so we can try to
+	 * figure how often this really happens.
+	 */
+
+	if (-ENOENT == find_object(SNAPSHOT_OBJ_TYPE_IB, snapshot->ib1base,
+			snapshot->process) && snapshot->ib1size) {
+		push_object(SNAPSHOT_OBJ_TYPE_IB, snapshot->process,
+			snapshot->ib1base, snapshot->ib1size);
+		KGSL_CORE_ERR(
+		"CP_IB1_BASE not found in the ringbuffer.Dumping %x dwords of the buffer.\n",
+		snapshot->ib1size);
+	}
+
+	/*
+	 * Add the last parsed IB2 to the list. The IB2 should be found as we
+	 * parse the objects below, but we try to add it to the list first, so
+	 * it too can be parsed.  Don't print an error message in this case - if
+	 * the IB2 is found during parsing, the list will be updated with the
+	 * correct size.
+	 */
+
+	if (-ENOENT == find_object(SNAPSHOT_OBJ_TYPE_IB, snapshot->ib2base,
+		snapshot->process)) {
+		push_object(SNAPSHOT_OBJ_TYPE_IB, snapshot->process,
+			snapshot->ib2base, snapshot->ib2size);
+	}
+
+	/*
+	 * Go through the list of found objects and dump each one.  As the IBs
+	 * are parsed, more objects might be found, and objbufptr will increase
+	 */
+	for (i = 0; i < objbufptr; i++)
+		dump_object(device, i, snapshot);
+
+	/*
+	 * Incase snapshot static blob is running out of memory, Add Active IB1
+	 * and IB2 entries to obj_list so that active ib's can be dumped to
+	 * snapshot dynamic blob.
+	 */
+	if (!snapshot->ib1dumped || !snapshot->ib2dumped)
+		kgsl_snapshot_add_active_ib_obj_list(device, snapshot);
+
+	if (ib_max_objs)
+		KGSL_CORE_ERR("Max objects found in IB\n");
+	if (snapshot_frozen_objsize)
+		KGSL_CORE_ERR("GPU snapshot froze %zdKb of GPU buffers\n",
+			snapshot_frozen_objsize / 1024);
+
+}
+
+/*
+ * adreno_snapshot_cp_roq - Dump CP merciu data in snapshot
+ * @device: Device being snapshotted
+ * @remain: Bytes remaining in snapshot memory
+ * @priv: Size of merciu data in Dwords
+ */
+size_t adreno_snapshot_cp_merciu(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i, size = *((int *)priv);
+
+	/* The MERCIU data is two dwords per entry */
+	size = size << 1;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP MERCIU DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_MERCIU;
+	header->size = size;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_MERCIU_ADDR, 0x0);
+
+	for (i = 0; i < size; i++) {
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_MERCIU_DATA,
+			&data[(i * 2)]);
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_MERCIU_DATA2,
+			&data[(i * 2) + 1]);
+	}
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/*
+ * adreno_snapshot_cp_roq - Dump ROQ data in snapshot
+ * @device: Device being snapshotted
+ * @remain: Bytes remaining in snapshot memory
+ * @priv: Size of ROQ data in Dwords
+ */
+size_t adreno_snapshot_cp_roq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i, size = *((int *)priv);
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP ROQ DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_ROQ;
+	header->size = size;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ROQ_ADDR, 0x0);
+	for (i = 0; i < size; i++)
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_ROQ_DATA, &data[i]);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/*
+ * adreno_snapshot_cp_pm4_ram() - Dump PM4 data in snapshot
+ * @device: Device being snapshotted
+ * @buf: Snapshot memory
+ * @remain: Number of bytes left in snapshot memory
+ * @priv: Unused
+ */
+size_t adreno_snapshot_cp_pm4_ram(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i;
+	size_t size = adreno_dev->pm4_fw_size - 1;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PM4 RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PM4_RAM;
+	header->size = size;
+
+	/*
+	 * Read the firmware from the GPU rather than use our cache in order to
+	 * try to catch mis-programming or corruption in the hardware.  We do
+	 * use the cached version of the size, however, instead of trying to
+	 * maintain always changing hardcoded constants
+	 */
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_ME_RAM_RADDR, 0x0);
+	for (i = 0; i < size; i++)
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_ME_RAM_DATA, &data[i]);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/*
+ * adreno_snapshot_cp_pfp_ram() - Dump the PFP data on snapshot
+ * @device: Device being snapshotted
+ * @buf: Snapshot memory
+ * @remain: Amount of butes left in snapshot memory
+ * @priv: Unused
+ */
+size_t adreno_snapshot_cp_pfp_ram(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i, size = adreno_dev->pfp_fw_size - 1;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP PFP RAM DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_PFP_RAM;
+	header->size = size;
+
+	/*
+	 * Read the firmware from the GPU rather than use our cache in order to
+	 * try to catch mis-programming or corruption in the hardware.  We do
+	 * use the cached version of the size, however, instead of trying to
+	 * maintain always changing hardcoded constants
+	 */
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_ADDR, 0x0);
+	for (i = 0; i < size; i++)
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_PFP_UCODE_DATA,
+				&data[i]);
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/*
+ * adreno_snapshot_vpc_memory() - Save VPC data in snapshot
+ * @device: Device being snapshotted
+ * @buf: Snapshot memory
+ * @remain: Number of bytes left in snapshot memory
+ * @priv: Private data for VPC if any
+ */
+size_t adreno_snapshot_vpc_memory(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int vpc_mem_size = *((int *)priv);
+	size_t size = VPC_MEMORY_BANKS * vpc_mem_size;
+	int bank, addr, i = 0;
+
+	if (remain < DEBUG_SECTION_SZ(size)) {
+		SNAPSHOT_ERR_NOMEM(device, "VPC MEMORY");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_VPC_MEMORY;
+	header->size = size;
+
+	for (bank = 0; bank < VPC_MEMORY_BANKS; bank++) {
+		for (addr = 0; addr < vpc_mem_size; addr++) {
+			unsigned int val = bank | (addr << 4);
+
+			adreno_writereg(adreno_dev,
+				ADRENO_REG_VPC_DEBUG_RAM_SEL, val);
+			adreno_readreg(adreno_dev,
+				ADRENO_REG_VPC_DEBUG_RAM_READ, &data[i++]);
+		}
+	}
+
+	return DEBUG_SECTION_SZ(size);
+}
+
+/*
+ * adreno_snapshot_cp_meq() - Save CP MEQ data in snapshot
+ * @device: Device being snapshotted
+ * @buf: Snapshot memory
+ * @remain: Number of bytes left in snapshot memory
+ * @priv: Contains the size of MEQ data
+ */
+size_t adreno_snapshot_cp_meq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_debug *header = (struct kgsl_snapshot_debug *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i;
+	int cp_meq_sz = *((int *)priv);
+
+	if (remain < DEBUG_SECTION_SZ(cp_meq_sz)) {
+		SNAPSHOT_ERR_NOMEM(device, "CP MEQ DEBUG");
+		return 0;
+	}
+
+	header->type = SNAPSHOT_DEBUG_CP_MEQ;
+	header->size = cp_meq_sz;
+
+	adreno_writereg(adreno_dev, ADRENO_REG_CP_MEQ_ADDR, 0x0);
+	for (i = 0; i < cp_meq_sz; i++)
+		adreno_readreg(adreno_dev, ADRENO_REG_CP_MEQ_DATA, &data[i]);
+
+	return DEBUG_SECTION_SZ(cp_meq_sz);
+}
+
+static const struct adreno_vbif_snapshot_registers *vbif_registers(
+		struct adreno_device *adreno_dev,
+		const struct adreno_vbif_snapshot_registers *list,
+		unsigned int count)
+{
+	unsigned int version;
+	unsigned int i;
+
+	adreno_readreg(adreno_dev, ADRENO_REG_VBIF_VERSION, &version);
+
+	for (i = 0; i < count; i++) {
+		if ((list[i].version & list[i].mask) ==
+				(version & list[i].mask))
+			return &list[i];
+	}
+
+	KGSL_CORE_ERR(
+		"snapshot: Registers for VBIF version %X register were not dumped\n",
+		version);
+
+	return NULL;
+}
+
+void adreno_snapshot_registers(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		const unsigned int *regs, unsigned int count)
+{
+	struct kgsl_snapshot_registers r;
+
+	r.regs = regs;
+	r.count = count;
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS, snapshot,
+		kgsl_snapshot_dump_registers, &r);
+}
+
+void adreno_snapshot_vbif_registers(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		const struct adreno_vbif_snapshot_registers *list,
+		unsigned int count)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_snapshot_registers regs;
+	const struct adreno_vbif_snapshot_registers *vbif;
+
+	vbif = vbif_registers(adreno_dev, list, count);
+
+	if (vbif != NULL) {
+		regs.regs = vbif->registers;
+		regs.count = vbif->count;
+
+		kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_REGS,
+			snapshot, kgsl_snapshot_dump_registers, &regs);
+	}
+}
diff --git a/drivers/gpu/msm/adreno_snapshot.h b/drivers/gpu/msm/adreno_snapshot.h
new file mode 100644
index 0000000..6af0507
--- /dev/null
+++ b/drivers/gpu/msm/adreno_snapshot.h
@@ -0,0 +1,58 @@
+/* Copyright (c) 2013-2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __ADRENO_SNAPSHOT_H
+#define __ADRENO_SNAPSHOT_H
+
+#include "kgsl_snapshot.h"
+
+#define CP_CRASH_DUMPER_TIMEOUT 1000
+
+#define DEBUG_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \
+		+ sizeof(struct kgsl_snapshot_debug))
+
+#define SHADER_SECTION_SZ(_dwords) (((_dwords) * sizeof(unsigned int)) \
+		+ sizeof(struct kgsl_snapshot_shader))
+
+/* Section sizes for A320 */
+#define A320_SNAPSHOT_CP_STATE_SECTION_SIZE	0x2e
+#define A320_SNAPSHOT_ROQ_SECTION_SIZE		512
+#define A320_SNAPSHOT_CP_MERCIU_SECTION_SIZE	32
+
+/* Macro to make it super easy to dump registers */
+#define SNAPSHOT_REGISTERS(_d, _s, _r) \
+	adreno_snapshot_registers((_d), (_s), \
+		(unsigned int *) _r, ARRAY_SIZE(_r) /  2)
+
+size_t adreno_snapshot_cp_merciu(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+size_t adreno_snapshot_cp_roq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+size_t adreno_snapshot_cp_pm4_ram(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+size_t adreno_snapshot_cp_pfp_ram(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+size_t adreno_snapshot_cp_meq(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+size_t adreno_snapshot_vpc_memory(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+
+void adreno_snapshot_registers(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		const unsigned int *regs, unsigned int count);
+
+void adreno_snapshot_vbif_registers(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		const struct adreno_vbif_snapshot_registers *list,
+		unsigned int count);
+
+#endif /*__ADRENO_SNAPSHOT_H */
diff --git a/drivers/gpu/msm/adreno_sysfs.c b/drivers/gpu/msm/adreno_sysfs.c
new file mode 100644
index 0000000..2d8be7a
--- /dev/null
+++ b/drivers/gpu/msm/adreno_sysfs.c
@@ -0,0 +1,509 @@
+/* Copyright (c) 2014-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/sysfs.h>
+#include <linux/device.h>
+
+#include "kgsl_device.h"
+#include "adreno.h"
+
+struct adreno_sysfs_attribute {
+	struct device_attribute attr;
+	unsigned int (*show)(struct adreno_device *adreno_dev);
+	int (*store)(struct adreno_device *adreno_dev, unsigned int val);
+};
+
+#define _ADRENO_SYSFS_ATTR(_name, __show, __store) \
+struct adreno_sysfs_attribute adreno_attr_##_name = { \
+	.attr = __ATTR(_name, 0644, __show, __store), \
+	.show = _ ## _name ## _show, \
+	.store = _ ## _name ## _store, \
+}
+
+#define ADRENO_SYSFS_ATTR(_a) \
+	container_of((_a), struct adreno_sysfs_attribute, attr)
+
+static struct adreno_device *_get_adreno_dev(struct device *dev)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+
+	return device ? ADRENO_DEVICE(device) : NULL;
+}
+
+static int _ft_policy_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	adreno_dev->ft_policy = val & KGSL_FT_POLICY_MASK;
+	return 0;
+}
+
+static unsigned int _ft_policy_show(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->ft_policy;
+}
+
+static int _ft_pagefault_policy_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+
+	mutex_lock(&device->mutex);
+	val &= KGSL_FT_PAGEFAULT_MASK;
+
+	if (test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv))
+		ret = kgsl_mmu_set_pagefault_policy(&device->mmu,
+			(unsigned long) val);
+
+	if (ret == 0)
+		adreno_dev->ft_pf_policy = val;
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static unsigned int _ft_pagefault_policy_show(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->ft_pf_policy;
+}
+
+static int _ft_fast_hang_detect_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!test_bit(ADRENO_DEVICE_SOFT_FAULT_DETECT, &adreno_dev->priv))
+		return 0;
+
+	mutex_lock(&device->mutex);
+
+	if (val) {
+		if (!kgsl_active_count_get(device)) {
+			adreno_fault_detect_start(adreno_dev);
+			kgsl_active_count_put(device);
+		}
+	} else
+		adreno_fault_detect_stop(adreno_dev);
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static unsigned int _ft_fast_hang_detect_show(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->fast_hang_detect;
+}
+
+static int _ft_long_ib_detect_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	adreno_dev->long_ib_detect = val;
+	return 0;
+}
+
+static unsigned int _ft_long_ib_detect_show(struct adreno_device *adreno_dev)
+{
+	return adreno_dev->long_ib_detect;
+}
+
+static int _ft_hang_intr_status_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret = 0;
+
+	if (val == test_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv))
+		return 0;
+
+	mutex_lock(&device->mutex);
+	change_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
+
+	if (test_bit(ADRENO_DEVICE_STARTED, &adreno_dev->priv)) {
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+		adreno_irqctrl(adreno_dev, 1);
+	} else if (device->state == KGSL_STATE_INIT) {
+		ret = -EACCES;
+		change_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
+	}
+
+	mutex_unlock(&device->mutex);
+	return ret;
+}
+
+static unsigned int _ft_hang_intr_status_show(struct adreno_device *adreno_dev)
+{
+	return test_bit(ADRENO_DEVICE_HANG_INTR, &adreno_dev->priv);
+}
+
+static int _pwrctrl_store(struct adreno_device *adreno_dev,
+		unsigned int val, unsigned int flag)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (val == test_bit(flag, &adreno_dev->pwrctrl_flag))
+		return 0;
+
+	mutex_lock(&device->mutex);
+
+	/* Power down the GPU before changing the state */
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+	change_bit(flag, &adreno_dev->pwrctrl_flag);
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static int _preemption_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (test_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv) == val)
+		return 0;
+
+	mutex_lock(&device->mutex);
+
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+	change_bit(ADRENO_DEVICE_PREEMPTION, &adreno_dev->priv);
+	adreno_dev->cur_rb = &(adreno_dev->ringbuffers[0]);
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+
+static unsigned int _preemption_show(struct adreno_device *adreno_dev)
+{
+	return adreno_is_preemption_enabled(adreno_dev);
+}
+
+static int _hwcg_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	return _pwrctrl_store(adreno_dev, val, ADRENO_HWCG_CTRL);
+}
+
+static unsigned int _hwcg_show(struct adreno_device *adreno_dev)
+{
+	return test_bit(ADRENO_HWCG_CTRL, &adreno_dev->pwrctrl_flag);
+}
+
+static int _throttling_store(struct adreno_device *adreno_dev,
+	unsigned int val)
+{
+	return _pwrctrl_store(adreno_dev, val, ADRENO_THROTTLING_CTRL);
+}
+
+static unsigned int _throttling_show(struct adreno_device *adreno_dev)
+{
+	return test_bit(ADRENO_THROTTLING_CTRL, &adreno_dev->pwrctrl_flag);
+}
+
+static int _sptp_pc_store(struct adreno_device *adreno_dev,
+		unsigned int val)
+{
+	return _pwrctrl_store(adreno_dev, val, ADRENO_SPTP_PC_CTRL);
+}
+
+static unsigned int _sptp_pc_show(struct adreno_device *adreno_dev)
+{
+	return test_bit(ADRENO_SPTP_PC_CTRL, &adreno_dev->pwrctrl_flag);
+}
+
+static int _lm_store(struct adreno_device *adreno_dev, unsigned int val)
+{
+	return _pwrctrl_store(adreno_dev, val, ADRENO_LM_CTRL);
+}
+
+static unsigned int _lm_show(struct adreno_device *adreno_dev)
+{
+	return test_bit(ADRENO_LM_CTRL, &adreno_dev->pwrctrl_flag);
+}
+
+static ssize_t _sysfs_store_u32(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct adreno_device *adreno_dev = _get_adreno_dev(dev);
+	struct adreno_sysfs_attribute *_attr = ADRENO_SYSFS_ATTR(attr);
+	unsigned int val = 0;
+	int ret;
+
+	if (adreno_dev == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+
+	if (!ret && _attr->store)
+		ret = _attr->store(adreno_dev, val);
+
+	return (ssize_t) ret < 0 ? ret : count;
+}
+
+static ssize_t _sysfs_show_u32(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct adreno_device *adreno_dev = _get_adreno_dev(dev);
+	struct adreno_sysfs_attribute *_attr = ADRENO_SYSFS_ATTR(attr);
+	unsigned int val = 0;
+
+	if (adreno_dev == NULL)
+		return 0;
+
+	if (_attr->show)
+		val = _attr->show(adreno_dev);
+
+	return snprintf(buf, PAGE_SIZE, "0x%X\n", val);
+}
+
+static ssize_t _sysfs_store_bool(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct adreno_device *adreno_dev = _get_adreno_dev(dev);
+	struct adreno_sysfs_attribute *_attr = ADRENO_SYSFS_ATTR(attr);
+	unsigned int val = 0;
+	int ret;
+
+	if (adreno_dev == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+
+	if (!ret && _attr->store)
+		ret = _attr->store(adreno_dev, val ? 1 : 0);
+
+	return (ssize_t) ret < 0 ? ret : count;
+}
+
+static ssize_t _sysfs_show_bool(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct adreno_device *adreno_dev = _get_adreno_dev(dev);
+	struct adreno_sysfs_attribute *_attr = ADRENO_SYSFS_ATTR(attr);
+	unsigned int val = 0;
+
+	if (adreno_dev == NULL)
+		return 0;
+
+	if (_attr->show)
+		val = _attr->show(adreno_dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", val);
+}
+
+#define ADRENO_SYSFS_BOOL(_name) \
+	_ADRENO_SYSFS_ATTR(_name, _sysfs_show_bool, _sysfs_store_bool)
+
+#define ADRENO_SYSFS_U32(_name) \
+	_ADRENO_SYSFS_ATTR(_name, _sysfs_show_u32, _sysfs_store_u32)
+
+static ADRENO_SYSFS_U32(ft_policy);
+static ADRENO_SYSFS_U32(ft_pagefault_policy);
+static ADRENO_SYSFS_BOOL(ft_fast_hang_detect);
+static ADRENO_SYSFS_BOOL(ft_long_ib_detect);
+static ADRENO_SYSFS_BOOL(ft_hang_intr_status);
+
+static DEVICE_INT_ATTR(wake_nice, 0644, adreno_wake_nice);
+static DEVICE_INT_ATTR(wake_timeout, 0644, adreno_wake_timeout);
+
+static ADRENO_SYSFS_BOOL(sptp_pc);
+static ADRENO_SYSFS_BOOL(lm);
+static ADRENO_SYSFS_BOOL(preemption);
+static ADRENO_SYSFS_BOOL(hwcg);
+static ADRENO_SYSFS_BOOL(throttling);
+
+
+
+static const struct device_attribute *_attr_list[] = {
+	&adreno_attr_ft_policy.attr,
+	&adreno_attr_ft_pagefault_policy.attr,
+	&adreno_attr_ft_fast_hang_detect.attr,
+	&adreno_attr_ft_long_ib_detect.attr,
+	&adreno_attr_ft_hang_intr_status.attr,
+	&dev_attr_wake_nice.attr,
+	&dev_attr_wake_timeout.attr,
+	&adreno_attr_sptp_pc.attr,
+	&adreno_attr_lm.attr,
+	&adreno_attr_preemption.attr,
+	&adreno_attr_hwcg.attr,
+	&adreno_attr_throttling.attr,
+	NULL,
+};
+
+/* Add a ppd directory for controlling different knobs from sysfs */
+struct adreno_ppd_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct kgsl_device *device, char *buf);
+	ssize_t (*store)(struct kgsl_device *device, const char *buf,
+		size_t count);
+};
+
+#define PPD_ATTR(_name, _mode, _show, _store) \
+struct adreno_ppd_attribute attr_##_name = { \
+	.attr = { .name = __stringify(_name), .mode = _mode }, \
+	.show = _show, \
+	.store = _store, \
+}
+
+#define to_ppd_attr(a) \
+container_of((a), struct adreno_ppd_attribute, attr)
+
+#define kobj_to_device(a) \
+container_of((a), struct kgsl_device, ppd_kobj)
+
+static ssize_t ppd_enable_store(struct kgsl_device *device,
+				const char *buf, size_t count)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	unsigned int ppd_on = 0;
+	int ret;
+
+	if (!adreno_is_a430v2(adreno_dev) ||
+		!ADRENO_FEATURE(adreno_dev, ADRENO_PPD))
+		return count;
+
+	ret = kgsl_sysfs_store(buf, &ppd_on);
+	if (ret < 0)
+		return ret;
+
+	ppd_on = (ppd_on) ? 1 : 0;
+
+	if (ppd_on == test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag))
+		return count;
+
+	mutex_lock(&device->mutex);
+
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+	change_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag);
+	kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+
+	mutex_unlock(&device->mutex);
+	return count;
+}
+
+static ssize_t ppd_enable_show(struct kgsl_device *device,
+					char *buf)
+{
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	return snprintf(buf, PAGE_SIZE, "%u\n",
+		test_bit(ADRENO_PPD_CTRL, &adreno_dev->pwrctrl_flag));
+}
+/* Add individual ppd attributes here */
+static PPD_ATTR(enable, 0644, ppd_enable_show, ppd_enable_store);
+
+static ssize_t ppd_sysfs_show(struct kobject *kobj,
+	struct attribute *attr, char *buf)
+{
+	struct adreno_ppd_attribute *pattr = to_ppd_attr(attr);
+	struct kgsl_device *device = kobj_to_device(kobj);
+	ssize_t ret = -EIO;
+
+	if (device != NULL && pattr->show != NULL)
+		ret = pattr->show(device, buf);
+
+	return ret;
+}
+
+static ssize_t ppd_sysfs_store(struct kobject *kobj,
+	struct attribute *attr, const char *buf, size_t count)
+{
+	struct adreno_ppd_attribute *pattr = to_ppd_attr(attr);
+	struct kgsl_device *device = kobj_to_device(kobj);
+	ssize_t ret = -EIO;
+
+	if (device != NULL && pattr->store != NULL)
+		ret = pattr->store(device, buf, count);
+
+	return ret;
+}
+
+static const struct sysfs_ops ppd_sysfs_ops = {
+	.show = ppd_sysfs_show,
+	.store = ppd_sysfs_store,
+};
+
+static struct kobj_type ktype_ppd = {
+	.sysfs_ops = &ppd_sysfs_ops,
+};
+
+static void ppd_sysfs_close(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_PPD))
+		return;
+
+	sysfs_remove_file(&device->ppd_kobj, &attr_enable.attr);
+	kobject_put(&device->ppd_kobj);
+}
+
+static int ppd_sysfs_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	if (!ADRENO_FEATURE(adreno_dev, ADRENO_PPD))
+		return -ENODEV;
+
+	ret = kobject_init_and_add(&device->ppd_kobj, &ktype_ppd,
+		&device->dev->kobj, "ppd");
+
+	if (ret == 0)
+		ret = sysfs_create_file(&device->ppd_kobj, &attr_enable.attr);
+
+	return ret;
+}
+
+/**
+ * adreno_sysfs_close() - Take down the adreno sysfs files
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Take down the sysfs files on when the device goes away
+ */
+void adreno_sysfs_close(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+
+	ppd_sysfs_close(adreno_dev);
+	kgsl_remove_device_sysfs_files(device->dev, _attr_list);
+}
+
+/**
+ * adreno_sysfs_init() - Initialize adreno sysfs files
+ * @adreno_dev: Pointer to the adreno device
+ *
+ * Initialize many of the adreno specific sysfs files especially for fault
+ * tolerance and power control
+ */
+int adreno_sysfs_init(struct adreno_device *adreno_dev)
+{
+	struct kgsl_device *device = KGSL_DEVICE(adreno_dev);
+	int ret;
+
+	ret = kgsl_create_device_sysfs_files(device->dev, _attr_list);
+
+	/* Add the PPD directory and files */
+	if (ret == 0)
+		ppd_sysfs_init(adreno_dev);
+
+	return 0;
+}
+
diff --git a/drivers/gpu/msm/adreno_trace.c b/drivers/gpu/msm/adreno_trace.c
new file mode 100644
index 0000000..20a7210
--- /dev/null
+++ b/drivers/gpu/msm/adreno_trace.c
@@ -0,0 +1,21 @@
+/* Copyright (c) 2013-2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include "adreno.h"
+
+/* Instantiate tracepoints */
+#define CREATE_TRACE_POINTS
+#include "a3xx_reg.h"
+#include "a4xx_reg.h"
+#include "a5xx_reg.h"
+#include "adreno_trace.h"
diff --git a/drivers/gpu/msm/adreno_trace.h b/drivers/gpu/msm/adreno_trace.h
new file mode 100644
index 0000000..7bc4c93
--- /dev/null
+++ b/drivers/gpu/msm/adreno_trace.h
@@ -0,0 +1,586 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#if !defined(_ADRENO_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _ADRENO_TRACE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kgsl
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE adreno_trace
+
+#include <linux/tracepoint.h>
+#include "adreno_a3xx.h"
+#include "adreno_a4xx.h"
+#include "adreno_a5xx.h"
+
+TRACE_EVENT(adreno_cmdbatch_queued,
+	TP_PROTO(struct kgsl_drawobj *drawobj, unsigned int queued),
+	TP_ARGS(drawobj, queued),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, queued)
+		__field(unsigned int, flags)
+		__field(unsigned int, prio)
+	),
+	TP_fast_assign(
+		__entry->id = drawobj->context->id;
+		__entry->timestamp = drawobj->timestamp;
+		__entry->queued = queued;
+		__entry->flags = drawobj->flags;
+		__entry->prio = drawobj->context->priority;
+	),
+	TP_printk(
+		"ctx=%u ctx_prio=%u ts=%u queued=%u flags=%s",
+			__entry->id, __entry->prio,
+			__entry->timestamp, __entry->queued,
+			__entry->flags ? __print_flags(__entry->flags, "|",
+						KGSL_DRAWOBJ_FLAGS) : "none"
+	)
+);
+
+TRACE_EVENT(adreno_cmdbatch_submitted,
+	TP_PROTO(struct kgsl_drawobj *drawobj, int inflight, uint64_t ticks,
+		unsigned long secs, unsigned long usecs,
+		struct adreno_ringbuffer *rb, unsigned int rptr),
+	TP_ARGS(drawobj, inflight, ticks, secs, usecs, rb, rptr),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(int, inflight)
+		__field(unsigned int, flags)
+		__field(uint64_t, ticks)
+		__field(unsigned long, secs)
+		__field(unsigned long, usecs)
+		__field(int, prio)
+		__field(int, rb_id)
+		__field(unsigned int, rptr)
+		__field(unsigned int, wptr)
+		__field(int, q_inflight)
+	),
+	TP_fast_assign(
+		__entry->id = drawobj->context->id;
+		__entry->timestamp = drawobj->timestamp;
+		__entry->inflight = inflight;
+		__entry->flags = drawobj->flags;
+		__entry->ticks = ticks;
+		__entry->secs = secs;
+		__entry->usecs = usecs;
+		__entry->prio = drawobj->context->priority;
+		__entry->rb_id = rb->id;
+		__entry->rptr = rptr;
+		__entry->wptr = rb->wptr;
+		__entry->q_inflight = rb->dispatch_q.inflight;
+	),
+	TP_printk(
+		"ctx=%u ctx_prio=%d ts=%u inflight=%d flags=%s ticks=%lld time=%lu.%0lu rb_id=%d r/w=%x/%x, q_inflight=%d",
+			__entry->id, __entry->prio, __entry->timestamp,
+			__entry->inflight,
+			__entry->flags ? __print_flags(__entry->flags, "|",
+				KGSL_DRAWOBJ_FLAGS) : "none",
+			__entry->ticks, __entry->secs, __entry->usecs,
+			__entry->rb_id, __entry->rptr, __entry->wptr,
+			__entry->q_inflight
+	)
+);
+
+TRACE_EVENT(adreno_cmdbatch_retired,
+	TP_PROTO(struct kgsl_drawobj *drawobj, int inflight,
+		uint64_t start, uint64_t retire,
+		struct adreno_ringbuffer *rb, unsigned int rptr,
+		unsigned long fault_recovery),
+	TP_ARGS(drawobj, inflight, start, retire, rb, rptr, fault_recovery),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(int, inflight)
+		__field(unsigned int, recovery)
+		__field(unsigned int, flags)
+		__field(uint64_t, start)
+		__field(uint64_t, retire)
+		__field(int, prio)
+		__field(int, rb_id)
+		__field(unsigned int, rptr)
+		__field(unsigned int, wptr)
+		__field(int, q_inflight)
+		__field(unsigned long, fault_recovery)
+	),
+	TP_fast_assign(
+		__entry->id = drawobj->context->id;
+		__entry->timestamp = drawobj->timestamp;
+		__entry->inflight = inflight;
+		__entry->recovery = fault_recovery;
+		__entry->flags = drawobj->flags;
+		__entry->start = start;
+		__entry->retire = retire;
+		__entry->prio = drawobj->context->priority;
+		__entry->rb_id = rb->id;
+		__entry->rptr = rptr;
+		__entry->wptr = rb->wptr;
+		__entry->q_inflight = rb->dispatch_q.inflight;
+	),
+	TP_printk(
+		"ctx=%u ctx_prio=%d ts=%u inflight=%d recovery=%s flags=%s start=%lld retire=%lld rb_id=%d, r/w=%x/%x, q_inflight=%d",
+			__entry->id, __entry->prio, __entry->timestamp,
+			__entry->inflight,
+			__entry->recovery ?
+				__print_flags(__entry->recovery, "|",
+				ADRENO_FT_TYPES) : "none",
+			__entry->flags ? __print_flags(__entry->flags, "|",
+				KGSL_DRAWOBJ_FLAGS) : "none",
+			__entry->start,
+			__entry->retire,
+			__entry->rb_id, __entry->rptr, __entry->wptr,
+			__entry->q_inflight
+	)
+);
+
+TRACE_EVENT(adreno_cmdbatch_fault,
+	TP_PROTO(struct kgsl_drawobj_cmd *cmdobj, unsigned int fault),
+	TP_ARGS(cmdobj, fault),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, fault)
+	),
+	TP_fast_assign(
+		__entry->id = cmdobj->base.context->id;
+		__entry->timestamp = cmdobj->base.timestamp;
+		__entry->fault = fault;
+	),
+	TP_printk(
+		"ctx=%u ts=%u type=%s",
+			__entry->id, __entry->timestamp,
+			__print_symbolic(__entry->fault,
+				{ 0, "none" },
+				{ ADRENO_SOFT_FAULT, "soft" },
+				{ ADRENO_HARD_FAULT, "hard" },
+				{ ADRENO_TIMEOUT_FAULT, "timeout" })
+	)
+);
+
+TRACE_EVENT(adreno_cmdbatch_recovery,
+	TP_PROTO(struct kgsl_drawobj_cmd *cmdobj, unsigned int action),
+	TP_ARGS(cmdobj, action),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, action)
+	),
+	TP_fast_assign(
+		__entry->id = cmdobj->base.context->id;
+		__entry->timestamp = cmdobj->base.timestamp;
+		__entry->action = action;
+	),
+	TP_printk(
+		"ctx=%u ts=%u action=%s",
+			__entry->id, __entry->timestamp,
+			__print_symbolic(__entry->action, ADRENO_FT_TYPES)
+	)
+);
+
+DECLARE_EVENT_CLASS(adreno_drawctxt_template,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned int, priority)
+	),
+	TP_fast_assign(
+		__entry->id = drawctxt->base.id;
+		__entry->priority = drawctxt->base.priority;
+	),
+	TP_printk("ctx=%u priority=%u", __entry->id, __entry->priority)
+);
+
+DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_sleep,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
+DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_wake,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
+DEFINE_EVENT(adreno_drawctxt_template, dispatch_queue_context,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
+DEFINE_EVENT(adreno_drawctxt_template, adreno_drawctxt_invalidate,
+	TP_PROTO(struct adreno_context *drawctxt),
+	TP_ARGS(drawctxt)
+);
+
+TRACE_EVENT(adreno_drawctxt_wait_start,
+	TP_PROTO(unsigned int rb_id, unsigned int ctx_id, unsigned int ts),
+	TP_ARGS(rb_id, ctx_id, ts),
+	TP_STRUCT__entry(
+		__field(unsigned int, rb_id)
+		__field(unsigned int, ctx_id)
+		__field(unsigned int, ts)
+	),
+	TP_fast_assign(
+		__entry->rb_id = rb_id;
+		__entry->ctx_id = ctx_id;
+		__entry->ts = ts;
+	),
+	TP_printk(
+		"rb=%u ctx=%u ts=%u",
+			__entry->rb_id, __entry->ctx_id, __entry->ts
+	)
+);
+
+TRACE_EVENT(adreno_drawctxt_wait_done,
+	TP_PROTO(unsigned int rb_id, unsigned int ctx_id,
+			unsigned int ts, int status),
+	TP_ARGS(rb_id, ctx_id, ts, status),
+	TP_STRUCT__entry(
+		__field(unsigned int, rb_id)
+		__field(unsigned int, ctx_id)
+		__field(unsigned int, ts)
+		__field(int, status)
+	),
+	TP_fast_assign(
+		__entry->rb_id = rb_id;
+		__entry->ctx_id = ctx_id;
+		__entry->ts = ts;
+		__entry->status = status;
+	),
+	TP_printk(
+		"rb=%u ctx=%u ts=%u status=%d",
+		__entry->rb_id, __entry->ctx_id, __entry->ts, __entry->status
+	)
+);
+
+TRACE_EVENT(adreno_drawctxt_switch,
+	TP_PROTO(struct adreno_ringbuffer *rb,
+		struct adreno_context *newctx),
+	TP_ARGS(rb, newctx),
+	TP_STRUCT__entry(
+		__field(int, rb_level)
+		__field(unsigned int, oldctx)
+		__field(unsigned int, newctx)
+		__field(unsigned int, flags)
+	),
+	TP_fast_assign(
+		__entry->rb_level = rb->id;
+		__entry->oldctx = rb->drawctxt_active ?
+			rb->drawctxt_active->base.id : 0;
+		__entry->newctx = newctx ? newctx->base.id : 0;
+	),
+	TP_printk(
+		"rb level=%d oldctx=%u newctx=%u",
+		__entry->rb_level, __entry->oldctx, __entry->newctx
+	)
+);
+
+TRACE_EVENT(adreno_gpu_fault,
+	TP_PROTO(unsigned int ctx, unsigned int ts,
+		unsigned int status, unsigned int rptr, unsigned int wptr,
+		unsigned int ib1base, unsigned int ib1size,
+		unsigned int ib2base, unsigned int ib2size, int rb_id),
+	TP_ARGS(ctx, ts, status, rptr, wptr, ib1base, ib1size, ib2base,
+		ib2size, rb_id),
+	TP_STRUCT__entry(
+		__field(unsigned int, ctx)
+		__field(unsigned int, ts)
+		__field(unsigned int, status)
+		__field(unsigned int, rptr)
+		__field(unsigned int, wptr)
+		__field(unsigned int, ib1base)
+		__field(unsigned int, ib1size)
+		__field(unsigned int, ib2base)
+		__field(unsigned int, ib2size)
+		__field(int, rb_id)
+	),
+	TP_fast_assign(
+		__entry->ctx = ctx;
+		__entry->ts = ts;
+		__entry->status = status;
+		__entry->rptr = rptr;
+		__entry->wptr = wptr;
+		__entry->ib1base = ib1base;
+		__entry->ib1size = ib1size;
+		__entry->ib2base = ib2base;
+		__entry->ib2size = ib2size;
+		__entry->rb_id = rb_id;
+	),
+	TP_printk(
+		"ctx=%d ts=%d rb_id=%d status=%X RB=%X/%X IB1=%X/%X IB2=%X/%X",
+		__entry->ctx, __entry->ts, __entry->rb_id, __entry->status,
+		__entry->wptr, __entry->rptr, __entry->ib1base,
+		__entry->ib1size, __entry->ib2base, __entry->ib2size)
+);
+
+TRACE_EVENT(adreno_sp_tp,
+
+	TP_PROTO(unsigned long ip),
+
+	TP_ARGS(ip),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, ip)
+	),
+
+	TP_fast_assign(
+		__entry->ip = ip;
+	),
+
+	TP_printk(
+		"func=%pf", (void *) __entry->ip
+	)
+);
+
+/*
+ * Tracepoint for a3xx irq. Includes status info
+ */
+TRACE_EVENT(kgsl_a3xx_irq_status,
+
+	TP_PROTO(struct adreno_device *adreno_dev, unsigned int status),
+
+	TP_ARGS(adreno_dev, status),
+
+	TP_STRUCT__entry(
+		__string(device_name, adreno_dev->dev.name)
+		__field(unsigned int, status)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, adreno_dev->dev.name);
+		__entry->status = status;
+	),
+
+	TP_printk(
+		"d_name=%s status=%s",
+		__get_str(device_name),
+		__entry->status ? __print_flags(__entry->status, "|",
+			A3XX_IRQ_FLAGS) : "None"
+	)
+);
+
+/*
+ * Tracepoint for a4xx irq. Includes status info
+ */
+TRACE_EVENT(kgsl_a4xx_irq_status,
+
+	TP_PROTO(struct adreno_device *adreno_dev, unsigned int status),
+
+	TP_ARGS(adreno_dev, status),
+
+	TP_STRUCT__entry(
+		__string(device_name, adreno_dev->dev.name)
+		__field(unsigned int, status)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, adreno_dev->dev.name);
+		__entry->status = status;
+	),
+
+	TP_printk(
+		"d_name=%s status=%s",
+		__get_str(device_name),
+		__entry->status ? __print_flags(__entry->status, "|",
+			A4XX_IRQ_FLAGS) : "None"
+	)
+);
+
+/*
+ * Tracepoint for a5xx irq. Includes status info
+ */
+TRACE_EVENT(kgsl_a5xx_irq_status,
+
+	TP_PROTO(struct adreno_device *adreno_dev, unsigned int status),
+
+	TP_ARGS(adreno_dev, status),
+
+	TP_STRUCT__entry(
+		__string(device_name, adreno_dev->dev.name)
+		__field(unsigned int, status)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, adreno_dev->dev.name);
+		__entry->status = status;
+	),
+
+	TP_printk(
+		"d_name=%s status=%s",
+		__get_str(device_name),
+		__entry->status ? __print_flags(__entry->status, "|",
+			A5XX_IRQ_FLAGS) : "None"
+	)
+);
+
+DECLARE_EVENT_CLASS(adreno_hw_preempt_template,
+	TP_PROTO(struct adreno_ringbuffer *cur_rb,
+		struct adreno_ringbuffer *new_rb,
+		unsigned int cur_rptr, unsigned int new_rptr),
+	TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr),
+	TP_STRUCT__entry(__field(int, cur_level)
+			__field(int, new_level)
+			__field(unsigned int, cur_rptr)
+			__field(unsigned int, new_rptr)
+			__field(unsigned int, cur_wptr)
+			__field(unsigned int, new_wptr)
+			__field(unsigned int, cur_rbbase)
+			__field(unsigned int, new_rbbase)
+	),
+	TP_fast_assign(__entry->cur_level = cur_rb->id;
+			__entry->new_level = new_rb->id;
+			__entry->cur_rptr = cur_rptr;
+			__entry->new_rptr = new_rptr;
+			__entry->cur_wptr = cur_rb->wptr;
+			__entry->new_wptr = new_rb->wptr;
+			__entry->cur_rbbase = cur_rb->buffer_desc.gpuaddr;
+			__entry->new_rbbase = new_rb->buffer_desc.gpuaddr;
+	),
+	TP_printk(
+	"cur_rb_lvl=%d rptr=%x wptr=%x rbbase=%x new_rb_lvl=%d rptr=%x wptr=%x rbbase=%x",
+		__entry->cur_level, __entry->cur_rptr,
+		__entry->cur_wptr, __entry->cur_rbbase,
+		__entry->new_level, __entry->new_rptr,
+		__entry->new_wptr, __entry->new_rbbase
+	)
+);
+
+DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_clear_to_trig,
+	TP_PROTO(struct adreno_ringbuffer *cur_rb,
+		struct adreno_ringbuffer *new_rb,
+		unsigned int cur_rptr, unsigned int new_rptr),
+	TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr)
+);
+
+DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_trig_to_comp,
+	TP_PROTO(struct adreno_ringbuffer *cur_rb,
+		struct adreno_ringbuffer *new_rb,
+		unsigned int cur_rptr, unsigned int new_rptr),
+	TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr)
+);
+
+DEFINE_EVENT(adreno_hw_preempt_template, adreno_hw_preempt_trig_to_comp_int,
+	TP_PROTO(struct adreno_ringbuffer *cur_rb,
+		struct adreno_ringbuffer *new_rb,
+		unsigned int cur_rptr, unsigned int new_rptr),
+	TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr)
+);
+
+TRACE_EVENT(adreno_hw_preempt_comp_to_clear,
+	TP_PROTO(struct adreno_ringbuffer *cur_rb,
+		struct adreno_ringbuffer *new_rb,
+		unsigned int cur_rptr, unsigned int new_rptr),
+	TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr),
+	TP_STRUCT__entry(__field(int, cur_level)
+			__field(int, new_level)
+			__field(unsigned int, cur_rptr)
+			__field(unsigned int, new_rptr)
+			__field(unsigned int, cur_wptr)
+			__field(unsigned int, new_wptr_end)
+			__field(unsigned int, new_wptr)
+			__field(unsigned int, cur_rbbase)
+			__field(unsigned int, new_rbbase)
+	),
+	TP_fast_assign(__entry->cur_level = cur_rb->id;
+			__entry->new_level = new_rb->id;
+			__entry->cur_rptr = cur_rptr;
+			__entry->new_rptr = new_rptr;
+			__entry->cur_wptr = cur_rb->wptr;
+			__entry->new_wptr_end = new_rb->wptr_preempt_end;
+			__entry->new_wptr = new_rb->wptr;
+			__entry->cur_rbbase = cur_rb->buffer_desc.gpuaddr;
+			__entry->new_rbbase = new_rb->buffer_desc.gpuaddr;
+	),
+	TP_printk(
+	"cur_rb_lvl=%d rptr=%x wptr=%x rbbase=%x prev_rb_lvl=%d rptr=%x wptr_preempt_end=%x wptr=%x rbbase=%x",
+		__entry->cur_level, __entry->cur_rptr,
+		__entry->cur_wptr, __entry->cur_rbbase,
+		__entry->new_level, __entry->new_rptr,
+		__entry->new_wptr_end, __entry->new_wptr, __entry->new_rbbase
+	)
+);
+
+TRACE_EVENT(adreno_hw_preempt_token_submit,
+	TP_PROTO(struct adreno_ringbuffer *cur_rb,
+		struct adreno_ringbuffer *new_rb,
+		unsigned int cur_rptr, unsigned int new_rptr),
+	TP_ARGS(cur_rb, new_rb, cur_rptr, new_rptr),
+	TP_STRUCT__entry(__field(int, cur_level)
+		__field(int, new_level)
+		__field(unsigned int, cur_rptr)
+		__field(unsigned int, new_rptr)
+		__field(unsigned int, cur_wptr)
+		__field(unsigned int, cur_wptr_end)
+		__field(unsigned int, new_wptr)
+		__field(unsigned int, cur_rbbase)
+		__field(unsigned int, new_rbbase)
+	),
+	TP_fast_assign(__entry->cur_level = cur_rb->id;
+			__entry->new_level = new_rb->id;
+			__entry->cur_rptr = cur_rptr;
+			__entry->new_rptr = new_rptr;
+			__entry->cur_wptr = cur_rb->wptr;
+			__entry->cur_wptr_end = cur_rb->wptr_preempt_end;
+			__entry->new_wptr = new_rb->wptr;
+			__entry->cur_rbbase = cur_rb->buffer_desc.gpuaddr;
+			__entry->new_rbbase = new_rb->buffer_desc.gpuaddr;
+	),
+	TP_printk(
+		"cur_rb_lvl=%d rptr=%x wptr_preempt_end=%x wptr=%x rbbase=%x new_rb_lvl=%d rptr=%x wptr=%x rbbase=%x",
+		__entry->cur_level, __entry->cur_rptr,
+		__entry->cur_wptr_end, __entry->cur_wptr,
+		__entry->cur_rbbase,
+		__entry->new_level, __entry->new_rptr,
+		__entry->new_wptr, __entry->new_rbbase
+	)
+);
+
+TRACE_EVENT(adreno_preempt_trigger,
+	TP_PROTO(struct adreno_ringbuffer *cur, struct adreno_ringbuffer *next),
+	TP_ARGS(cur, next),
+	TP_STRUCT__entry(
+		__field(struct adreno_ringbuffer *, cur)
+		__field(struct adreno_ringbuffer *, next)
+	),
+	TP_fast_assign(
+		__entry->cur = cur;
+		__entry->next = next;
+	),
+	TP_printk("trigger from id=%d to id=%d",
+		__entry->cur->id, __entry->next->id
+	)
+);
+
+TRACE_EVENT(adreno_preempt_done,
+	TP_PROTO(struct adreno_ringbuffer *cur, struct adreno_ringbuffer *next),
+	TP_ARGS(cur, next),
+	TP_STRUCT__entry(
+		__field(struct adreno_ringbuffer *, cur)
+		__field(struct adreno_ringbuffer *, next)
+	),
+	TP_fast_assign(
+		__entry->cur = cur;
+		__entry->next = next;
+	),
+	TP_printk("done switch to id=%d from id=%d",
+		__entry->next->id, __entry->cur->id
+	)
+);
+#endif /* _ADRENO_TRACE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/gpu/msm/kgsl.c b/drivers/gpu/msm/kgsl.c
new file mode 100644
index 0000000..e50442a
--- /dev/null
+++ b/drivers/gpu/msm/kgsl.c
@@ -0,0 +1,4704 @@
+/* Copyright (c) 2008-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/module.h>
+#include <linux/fb.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/fdtable.h>
+#include <linux/list.h>
+#include <linux/debugfs.h>
+#include <linux/uaccess.h>
+#include <linux/interrupt.h>
+#include <linux/workqueue.h>
+#include <linux/dma-buf.h>
+#include <linux/pm_runtime.h>
+#include <linux/rbtree.h>
+#include <linux/major.h>
+#include <linux/io.h>
+#include <linux/mman.h>
+#include <linux/sort.h>
+#include <linux/security.h>
+#include <linux/compat.h>
+#include <linux/ctype.h>
+#include <linux/mm.h>
+#include <asm/cacheflush.h>
+
+#include "kgsl.h"
+#include "kgsl_debugfs.h"
+#include "kgsl_log.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_drawobj.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+#include "kgsl_sync.h"
+#include "kgsl_compat.h"
+#include "kgsl_pool.h"
+
+#undef MODULE_PARAM_PREFIX
+#define MODULE_PARAM_PREFIX "kgsl."
+
+#ifndef arch_mmap_check
+#define arch_mmap_check(addr, len, flags)	(0)
+#endif
+
+#ifndef pgprot_writebackcache
+#define pgprot_writebackcache(_prot)	(_prot)
+#endif
+
+#ifndef pgprot_writethroughcache
+#define pgprot_writethroughcache(_prot)	(_prot)
+#endif
+
+#ifdef CONFIG_ARM_LPAE
+#define KGSL_DMA_BIT_MASK	DMA_BIT_MASK(64)
+#else
+#define KGSL_DMA_BIT_MASK	DMA_BIT_MASK(32)
+#endif
+
+static char *kgsl_mmu_type;
+module_param_named(mmutype, kgsl_mmu_type, charp, 0000);
+MODULE_PARM_DESC(kgsl_mmu_type, "Type of MMU to be used for graphics");
+
+/* Mutex used for the IOMMU sync quirk */
+DEFINE_MUTEX(kgsl_mmu_sync);
+EXPORT_SYMBOL(kgsl_mmu_sync);
+
+struct kgsl_dma_buf_meta {
+	struct dma_buf_attachment *attach;
+	struct dma_buf *dmabuf;
+	struct sg_table *table;
+};
+
+static inline struct kgsl_pagetable *_get_memdesc_pagetable(
+		struct kgsl_pagetable *pt, struct kgsl_mem_entry *entry)
+{
+	/* if a secured buffer, map it to secure global pagetable */
+	if (kgsl_memdesc_is_secured(&entry->memdesc))
+		return pt->mmu->securepagetable;
+
+	return pt;
+}
+
+static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry);
+
+static const struct file_operations kgsl_fops;
+
+/*
+ * The memfree list contains the last N blocks of memory that have been freed.
+ * On a GPU fault we walk the list to see if the faulting address had been
+ * recently freed and print out a message to that effect
+ */
+
+#define MEMFREE_ENTRIES 512
+
+static DEFINE_SPINLOCK(memfree_lock);
+
+struct memfree_entry {
+	pid_t ptname;
+	uint64_t gpuaddr;
+	uint64_t size;
+	pid_t pid;
+	uint64_t flags;
+};
+
+static struct {
+	struct memfree_entry *list;
+	int head;
+	int tail;
+} memfree;
+
+static int kgsl_memfree_init(void)
+{
+	memfree.list = kcalloc(MEMFREE_ENTRIES, sizeof(struct memfree_entry),
+		GFP_KERNEL);
+
+	return (memfree.list) ? 0 : -ENOMEM;
+}
+
+static void kgsl_memfree_exit(void)
+{
+	kfree(memfree.list);
+	memset(&memfree, 0, sizeof(memfree));
+}
+
+static inline bool match_memfree_addr(struct memfree_entry *entry,
+		pid_t ptname, uint64_t gpuaddr)
+{
+	return ((entry->ptname == ptname) &&
+		(entry->size > 0) &&
+		(gpuaddr >= entry->gpuaddr &&
+			 gpuaddr < (entry->gpuaddr + entry->size)));
+}
+int kgsl_memfree_find_entry(pid_t ptname, uint64_t *gpuaddr,
+	uint64_t *size, uint64_t *flags, pid_t *pid)
+{
+	int ptr;
+
+	if (memfree.list == NULL)
+		return 0;
+
+	spin_lock(&memfree_lock);
+
+	ptr = memfree.head - 1;
+	if (ptr < 0)
+		ptr = MEMFREE_ENTRIES - 1;
+
+	/* Walk backwards through the list looking for the last match  */
+	while (ptr != memfree.tail) {
+		struct memfree_entry *entry = &memfree.list[ptr];
+
+		if (match_memfree_addr(entry, ptname, *gpuaddr)) {
+			*gpuaddr = entry->gpuaddr;
+			*flags = entry->flags;
+			*size = entry->size;
+			*pid = entry->pid;
+
+			spin_unlock(&memfree_lock);
+			return 1;
+		}
+
+		ptr = ptr - 1;
+
+		if (ptr < 0)
+			ptr = MEMFREE_ENTRIES - 1;
+	}
+
+	spin_unlock(&memfree_lock);
+	return 0;
+}
+
+static void kgsl_memfree_purge(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr, uint64_t size)
+{
+	pid_t ptname = pagetable ? pagetable->name : 0;
+	int i;
+
+	if (memfree.list == NULL)
+		return;
+
+	spin_lock(&memfree_lock);
+
+	for (i = 0; i < MEMFREE_ENTRIES; i++) {
+		struct memfree_entry *entry = &memfree.list[i];
+
+		if (entry->ptname != ptname || entry->size == 0)
+			continue;
+
+		if (gpuaddr > entry->gpuaddr &&
+			gpuaddr < entry->gpuaddr + entry->size) {
+			/* truncate the end of the entry */
+			entry->size = gpuaddr - entry->gpuaddr;
+		} else if (gpuaddr <= entry->gpuaddr) {
+			if (gpuaddr + size > entry->gpuaddr &&
+				gpuaddr + size < entry->gpuaddr + entry->size)
+				/* Truncate the beginning of the entry */
+				entry->gpuaddr = gpuaddr + size;
+			else if (gpuaddr + size >= entry->gpuaddr + entry->size)
+				/* Remove the entire entry */
+				entry->size = 0;
+		}
+	}
+	spin_unlock(&memfree_lock);
+}
+
+static void kgsl_memfree_add(pid_t pid, pid_t ptname, uint64_t gpuaddr,
+		uint64_t size, uint64_t flags)
+
+{
+	struct memfree_entry *entry;
+
+	if (memfree.list == NULL)
+		return;
+
+	spin_lock(&memfree_lock);
+
+	entry = &memfree.list[memfree.head];
+
+	entry->pid = pid;
+	entry->ptname = ptname;
+	entry->gpuaddr = gpuaddr;
+	entry->size = size;
+	entry->flags = flags;
+
+	memfree.head = (memfree.head + 1) % MEMFREE_ENTRIES;
+
+	if (memfree.head == memfree.tail)
+		memfree.tail = (memfree.tail + 1) % MEMFREE_ENTRIES;
+
+	spin_unlock(&memfree_lock);
+}
+
+int kgsl_readtimestamp(struct kgsl_device *device, void *priv,
+		enum kgsl_timestamp_type type, unsigned int *timestamp)
+{
+	return device->ftbl->readtimestamp(device, priv, type, timestamp);
+}
+EXPORT_SYMBOL(kgsl_readtimestamp);
+
+static long gpumem_free_entry(struct kgsl_mem_entry *entry);
+
+/* Scheduled by kgsl_mem_entry_put_deferred() */
+static void _deferred_put(struct work_struct *work)
+{
+	struct kgsl_mem_entry *entry =
+		container_of(work, struct kgsl_mem_entry, work);
+
+	kgsl_mem_entry_put(entry);
+}
+
+static inline struct kgsl_mem_entry *
+kgsl_mem_entry_create(void)
+{
+	struct kgsl_mem_entry *entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+
+	if (entry != NULL)
+		kref_init(&entry->refcount);
+
+	return entry;
+}
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static void kgsl_destroy_ion(struct kgsl_dma_buf_meta *meta)
+{
+	if (meta != NULL) {
+		dma_buf_unmap_attachment(meta->attach, meta->table,
+			DMA_FROM_DEVICE);
+		dma_buf_detach(meta->dmabuf, meta->attach);
+		dma_buf_put(meta->dmabuf);
+		kfree(meta);
+	}
+}
+#else
+static void kgsl_destroy_ion(struct kgsl_dma_buf_meta *meta)
+{
+
+}
+#endif
+
+void
+kgsl_mem_entry_destroy(struct kref *kref)
+{
+	struct kgsl_mem_entry *entry = container_of(kref,
+						    struct kgsl_mem_entry,
+						    refcount);
+	unsigned int memtype;
+
+	if (entry == NULL)
+		return;
+
+	/* pull out the memtype before the flags get cleared */
+	memtype = kgsl_memdesc_usermem_type(&entry->memdesc);
+
+	/* Detach from process list */
+	kgsl_mem_entry_detach_process(entry);
+
+	if (memtype != KGSL_MEM_ENTRY_KERNEL)
+		atomic_long_sub(entry->memdesc.size,
+			&kgsl_driver.stats.mapped);
+
+	/*
+	 * Ion takes care of freeing the sg_table for us so
+	 * clear the sg table before freeing the sharedmem
+	 * so kgsl_sharedmem_free doesn't try to free it again
+	 */
+	if (memtype == KGSL_MEM_ENTRY_ION)
+		entry->memdesc.sgt = NULL;
+
+	if ((memtype == KGSL_MEM_ENTRY_USER)
+		&& !(entry->memdesc.flags & KGSL_MEMFLAGS_GPUREADONLY)) {
+		int i = 0, j;
+		struct scatterlist *sg;
+		struct page *page;
+		/*
+		 * Mark all of pages in the scatterlist as dirty since they
+		 * were writable by the GPU.
+		 */
+		for_each_sg(entry->memdesc.sgt->sgl, sg,
+			    entry->memdesc.sgt->nents, i) {
+			page = sg_page(sg);
+			for (j = 0; j < (sg->length >> PAGE_SHIFT); j++)
+				set_page_dirty(nth_page(page, j));
+		}
+	}
+
+	kgsl_sharedmem_free(&entry->memdesc);
+
+	switch (memtype) {
+	case KGSL_MEM_ENTRY_ION:
+		kgsl_destroy_ion(entry->priv_data);
+		break;
+	default:
+		break;
+	}
+
+	kfree(entry);
+}
+EXPORT_SYMBOL(kgsl_mem_entry_destroy);
+
+/* Allocate a IOVA for memory objects that don't use SVM */
+static int kgsl_mem_entry_track_gpuaddr(struct kgsl_device *device,
+		struct kgsl_process_private *process,
+		struct kgsl_mem_entry *entry)
+{
+	struct kgsl_pagetable *pagetable;
+
+	/*
+	 * If SVM is enabled for this object then the address needs to be
+	 * assigned elsewhere
+	 * Also do not proceed further in case of NoMMU.
+	 */
+	if (kgsl_memdesc_use_cpu_map(&entry->memdesc) ||
+		(kgsl_mmu_get_mmutype(device) == KGSL_MMU_TYPE_NONE))
+		return 0;
+
+	pagetable = kgsl_memdesc_is_secured(&entry->memdesc) ?
+		device->mmu.securepagetable : process->pagetable;
+
+	return kgsl_mmu_get_gpuaddr(pagetable, &entry->memdesc);
+}
+
+/* Commit the entry to the process so it can be accessed by other operations */
+static void kgsl_mem_entry_commit_process(struct kgsl_mem_entry *entry)
+{
+	if (!entry)
+		return;
+
+	spin_lock(&entry->priv->mem_lock);
+	idr_replace(&entry->priv->mem_idr, entry, entry->id);
+	spin_unlock(&entry->priv->mem_lock);
+}
+
+/*
+ * Attach the memory object to a process by (possibly) getting a GPU address and
+ * (possibly) mapping it
+ */
+static int kgsl_mem_entry_attach_process(struct kgsl_device *device,
+		struct kgsl_process_private *process,
+		struct kgsl_mem_entry *entry)
+{
+	int id, ret;
+
+	ret = kgsl_process_private_get(process);
+	if (!ret)
+		return -EBADF;
+
+	ret = kgsl_mem_entry_track_gpuaddr(device, process, entry);
+	if (ret) {
+		kgsl_process_private_put(process);
+		return ret;
+	}
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&process->mem_lock);
+	/* Allocate the ID but don't attach the pointer just yet */
+	id = idr_alloc(&process->mem_idr, NULL, 1, 0, GFP_NOWAIT);
+	spin_unlock(&process->mem_lock);
+	idr_preload_end();
+
+	if (id < 0) {
+		if (!kgsl_memdesc_use_cpu_map(&entry->memdesc))
+			kgsl_mmu_put_gpuaddr(&entry->memdesc);
+		kgsl_process_private_put(process);
+		return id;
+	}
+
+	entry->id = id;
+	entry->priv = process;
+
+	/*
+	 * Map the memory if a GPU address is already assigned, either through
+	 * kgsl_mem_entry_track_gpuaddr() or via some other SVM process
+	 */
+	if (entry->memdesc.gpuaddr) {
+		if (entry->memdesc.flags & KGSL_MEMFLAGS_SPARSE_VIRT)
+			ret = kgsl_mmu_sparse_dummy_map(
+					entry->memdesc.pagetable,
+					&entry->memdesc, 0,
+					entry->memdesc.size);
+		else if (entry->memdesc.gpuaddr)
+			ret = kgsl_mmu_map(entry->memdesc.pagetable,
+					&entry->memdesc);
+
+		if (ret)
+			kgsl_mem_entry_detach_process(entry);
+	}
+
+	kgsl_memfree_purge(entry->memdesc.pagetable, entry->memdesc.gpuaddr,
+		entry->memdesc.size);
+
+	return ret;
+}
+
+/* Detach a memory entry from a process and unmap it from the MMU */
+static void kgsl_mem_entry_detach_process(struct kgsl_mem_entry *entry)
+{
+	unsigned int type;
+
+	if (entry == NULL)
+		return;
+
+	/*
+	 * First remove the entry from mem_idr list
+	 * so that no one can operate on obsolete values
+	 */
+	spin_lock(&entry->priv->mem_lock);
+	if (entry->id != 0)
+		idr_remove(&entry->priv->mem_idr, entry->id);
+	entry->id = 0;
+
+	type = kgsl_memdesc_usermem_type(&entry->memdesc);
+	entry->priv->stats[type].cur -= entry->memdesc.size;
+	spin_unlock(&entry->priv->mem_lock);
+
+	kgsl_mmu_put_gpuaddr(&entry->memdesc);
+
+	kgsl_process_private_put(entry->priv);
+
+	entry->priv = NULL;
+}
+
+/**
+ * kgsl_context_dump() - dump information about a draw context
+ * @device: KGSL device that owns the context
+ * @context: KGSL context to dump information about
+ *
+ * Dump specific information about the context to the kernel log.  Used for
+ * fence timeout callbacks
+ */
+void kgsl_context_dump(struct kgsl_context *context)
+{
+	struct kgsl_device *device;
+
+	if (_kgsl_context_get(context) == 0)
+		return;
+
+	device = context->device;
+
+	if (kgsl_context_detached(context)) {
+		dev_err(device->dev, "  context[%d]: context detached\n",
+			context->id);
+	} else if (device->ftbl->drawctxt_dump != NULL)
+		device->ftbl->drawctxt_dump(device, context);
+
+	kgsl_context_put(context);
+}
+EXPORT_SYMBOL(kgsl_context_dump);
+
+/* Allocate a new context ID */
+static int _kgsl_get_context_id(struct kgsl_device *device)
+{
+	int id;
+
+	idr_preload(GFP_KERNEL);
+	write_lock(&device->context_lock);
+	/* Allocate the slot but don't put a pointer in it yet */
+	id = idr_alloc(&device->context_idr, NULL, 1,
+		KGSL_MEMSTORE_MAX, GFP_NOWAIT);
+	write_unlock(&device->context_lock);
+	idr_preload_end();
+
+	return id;
+}
+
+/**
+ * kgsl_context_init() - helper to initialize kgsl_context members
+ * @dev_priv: the owner of the context
+ * @context: the newly created context struct, should be allocated by
+ * the device specific drawctxt_create function.
+ *
+ * This is a helper function for the device specific drawctxt_create
+ * function to initialize the common members of its context struct.
+ * If this function succeeds, reference counting is active in the context
+ * struct and the caller should kgsl_context_put() it on error.
+ * If it fails, the caller should just free the context structure
+ * it passed in.
+ */
+int kgsl_context_init(struct kgsl_device_private *dev_priv,
+			struct kgsl_context *context)
+{
+	struct kgsl_device *device = dev_priv->device;
+	char name[64];
+	int ret = 0, id;
+
+	id = _kgsl_get_context_id(device);
+	if (id == -ENOSPC) {
+		/*
+		 * Before declaring that there are no contexts left try
+		 * flushing the event workqueue just in case there are
+		 * detached contexts waiting to finish
+		 */
+
+		flush_workqueue(device->events_wq);
+		id = _kgsl_get_context_id(device);
+	}
+
+	if (id < 0) {
+		if (id == -ENOSPC)
+			KGSL_DRV_INFO(device,
+				"cannot have more than %zu contexts due to memstore limitation\n",
+				KGSL_MEMSTORE_MAX);
+
+		return id;
+	}
+
+	context->id = id;
+
+	kref_init(&context->refcount);
+	/*
+	 * Get a refernce to the process private so its not destroyed, until
+	 * the context is destroyed. This will also prevent the pagetable
+	 * from being destroyed
+	 */
+	if (!kgsl_process_private_get(dev_priv->process_priv)) {
+		ret = -EBADF;
+		goto out;
+	}
+	context->device = dev_priv->device;
+	context->dev_priv = dev_priv;
+	context->proc_priv = dev_priv->process_priv;
+	context->tid = task_pid_nr(current);
+
+	ret = kgsl_sync_timeline_create(context);
+	if (ret)
+		goto out;
+
+	snprintf(name, sizeof(name), "context-%d", id);
+	kgsl_add_event_group(&context->events, context, name,
+		kgsl_readtimestamp, context);
+
+out:
+	if (ret) {
+		write_lock(&device->context_lock);
+		idr_remove(&dev_priv->device->context_idr, id);
+		write_unlock(&device->context_lock);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_context_init);
+
+/**
+ * kgsl_context_detach() - Release the "master" context reference
+ * @context: The context that will be detached
+ *
+ * This is called when a context becomes unusable, because userspace
+ * has requested for it to be destroyed. The context itself may
+ * exist a bit longer until its reference count goes to zero.
+ * Other code referencing the context can detect that it has been
+ * detached by checking the KGSL_CONTEXT_PRIV_DETACHED bit in
+ * context->priv.
+ */
+static void kgsl_context_detach(struct kgsl_context *context)
+{
+	struct kgsl_device *device;
+
+	if (context == NULL)
+		return;
+
+	/*
+	 * Mark the context as detached to keep others from using
+	 * the context before it gets fully removed, and to make sure
+	 * we don't try to detach twice.
+	 */
+	if (test_and_set_bit(KGSL_CONTEXT_PRIV_DETACHED, &context->priv))
+		return;
+
+	device = context->device;
+
+	trace_kgsl_context_detach(device, context);
+
+	context->device->ftbl->drawctxt_detach(context);
+
+	/*
+	 * Cancel all pending events after the device-specific context is
+	 * detached, to avoid possibly freeing memory while it is still
+	 * in use by the GPU.
+	 */
+	kgsl_cancel_events(device, &context->events);
+
+	/* Remove the event group from the list */
+	kgsl_del_event_group(&context->events);
+
+	kgsl_context_put(context);
+}
+
+void
+kgsl_context_destroy(struct kref *kref)
+{
+	struct kgsl_context *context = container_of(kref, struct kgsl_context,
+						    refcount);
+	struct kgsl_device *device = context->device;
+
+	trace_kgsl_context_destroy(device, context);
+
+	/*
+	 * It's not safe to destroy the context if it's not detached as GPU
+	 * may still be executing commands
+	 */
+	BUG_ON(!kgsl_context_detached(context));
+
+	write_lock(&device->context_lock);
+	if (context->id != KGSL_CONTEXT_INVALID) {
+
+		/* Clear the timestamps in the memstore during destroy */
+		kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, soptimestamp), 0);
+		kgsl_sharedmem_writel(device, &device->memstore,
+			KGSL_MEMSTORE_OFFSET(context->id, eoptimestamp), 0);
+
+		/* clear device power constraint */
+		if (context->id == device->pwrctrl.constraint.owner_id) {
+			trace_kgsl_constraint(device,
+				device->pwrctrl.constraint.type,
+				device->pwrctrl.active_pwrlevel,
+				0);
+			device->pwrctrl.constraint.type = KGSL_CONSTRAINT_NONE;
+		}
+
+		idr_remove(&device->context_idr, context->id);
+		context->id = KGSL_CONTEXT_INVALID;
+	}
+	write_unlock(&device->context_lock);
+	kgsl_sync_timeline_destroy(context);
+	kgsl_process_private_put(context->proc_priv);
+
+	device->ftbl->drawctxt_destroy(context);
+}
+
+struct kgsl_device *kgsl_get_device(int dev_idx)
+{
+	int i;
+	struct kgsl_device *ret = NULL;
+
+	mutex_lock(&kgsl_driver.devlock);
+
+	for (i = 0; i < KGSL_DEVICE_MAX; i++) {
+		if (kgsl_driver.devp[i] && kgsl_driver.devp[i]->id == dev_idx) {
+			ret = kgsl_driver.devp[i];
+			break;
+		}
+	}
+
+	mutex_unlock(&kgsl_driver.devlock);
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_get_device);
+
+static struct kgsl_device *kgsl_get_minor(int minor)
+{
+	struct kgsl_device *ret = NULL;
+
+	if (minor < 0 || minor >= KGSL_DEVICE_MAX)
+		return NULL;
+
+	mutex_lock(&kgsl_driver.devlock);
+	ret = kgsl_driver.devp[minor];
+	mutex_unlock(&kgsl_driver.devlock);
+
+	return ret;
+}
+
+/**
+ * kgsl_check_timestamp() - return true if the specified timestamp is retired
+ * @device: Pointer to the KGSL device to check
+ * @context: Pointer to the context for the timestamp
+ * @timestamp: The timestamp to compare
+ */
+int kgsl_check_timestamp(struct kgsl_device *device,
+	struct kgsl_context *context, unsigned int timestamp)
+{
+	unsigned int ts_processed;
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED,
+		&ts_processed);
+
+	return (timestamp_cmp(ts_processed, timestamp) >= 0);
+}
+EXPORT_SYMBOL(kgsl_check_timestamp);
+
+static int kgsl_suspend_device(struct kgsl_device *device, pm_message_t state)
+{
+	int status = -EINVAL;
+
+	if (!device)
+		return -EINVAL;
+
+	KGSL_PWR_WARN(device, "suspend start\n");
+
+	mutex_lock(&device->mutex);
+	status = kgsl_pwrctrl_change_state(device, KGSL_STATE_SUSPEND);
+	mutex_unlock(&device->mutex);
+
+	KGSL_PWR_WARN(device, "suspend end\n");
+	return status;
+}
+
+static int kgsl_resume_device(struct kgsl_device *device)
+{
+	if (!device)
+		return -EINVAL;
+
+	KGSL_PWR_WARN(device, "resume start\n");
+	mutex_lock(&device->mutex);
+	if (device->state == KGSL_STATE_SUSPEND) {
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+	} else if (device->state != KGSL_STATE_INIT) {
+		/*
+		 * This is an error situation,so wait for the device
+		 * to idle and then put the device to SLUMBER state.
+		 * This will put the device to the right state when
+		 * we resume.
+		 */
+		if (device->state == KGSL_STATE_ACTIVE)
+			device->ftbl->idle(device);
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_SLUMBER);
+		KGSL_PWR_ERR(device,
+			"resume invoked without a suspend\n");
+	}
+
+	mutex_unlock(&device->mutex);
+	KGSL_PWR_WARN(device, "resume end\n");
+	return 0;
+}
+
+static int kgsl_suspend(struct device *dev)
+{
+
+	pm_message_t arg = {0};
+	struct kgsl_device *device = dev_get_drvdata(dev);
+
+	return kgsl_suspend_device(device, arg);
+}
+
+static int kgsl_resume(struct device *dev)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+
+	return kgsl_resume_device(device);
+}
+
+static int kgsl_runtime_suspend(struct device *dev)
+{
+	return 0;
+}
+
+static int kgsl_runtime_resume(struct device *dev)
+{
+	return 0;
+}
+
+const struct dev_pm_ops kgsl_pm_ops = {
+	.suspend = kgsl_suspend,
+	.resume = kgsl_resume,
+	.runtime_suspend = kgsl_runtime_suspend,
+	.runtime_resume = kgsl_runtime_resume,
+};
+EXPORT_SYMBOL(kgsl_pm_ops);
+
+int kgsl_suspend_driver(struct platform_device *pdev,
+					pm_message_t state)
+{
+	struct kgsl_device *device = dev_get_drvdata(&pdev->dev);
+
+	return kgsl_suspend_device(device, state);
+}
+EXPORT_SYMBOL(kgsl_suspend_driver);
+
+int kgsl_resume_driver(struct platform_device *pdev)
+{
+	struct kgsl_device *device = dev_get_drvdata(&pdev->dev);
+
+	return kgsl_resume_device(device);
+}
+EXPORT_SYMBOL(kgsl_resume_driver);
+
+/**
+ * kgsl_destroy_process_private() - Cleanup function to free process private
+ * @kref: - Pointer to object being destroyed's kref struct
+ * Free struct object and all other resources attached to it.
+ * Since the function can be used when not all resources inside process
+ * private have been allocated, there is a check to (before each resource
+ * cleanup) see if the struct member being cleaned is in fact allocated or not.
+ * If the value is not NULL, resource is freed.
+ */
+static void kgsl_destroy_process_private(struct kref *kref)
+{
+	struct kgsl_process_private *private = container_of(kref,
+			struct kgsl_process_private, refcount);
+
+	idr_destroy(&private->mem_idr);
+	idr_destroy(&private->syncsource_idr);
+
+	/* When using global pagetables, do not detach global pagetable */
+	if (private->pagetable->name != KGSL_MMU_GLOBAL_PT)
+		kgsl_mmu_putpagetable(private->pagetable);
+
+	kfree(private);
+}
+
+void
+kgsl_process_private_put(struct kgsl_process_private *private)
+{
+	if (private)
+		kref_put(&private->refcount, kgsl_destroy_process_private);
+}
+
+/**
+ * kgsl_process_private_find() - Find the process associated with the specified
+ * name
+ * @name: pid_t of the process to search for
+ * Return the process struct for the given ID.
+ */
+struct kgsl_process_private *kgsl_process_private_find(pid_t pid)
+{
+	struct kgsl_process_private *p, *private = NULL;
+
+	mutex_lock(&kgsl_driver.process_mutex);
+	list_for_each_entry(p, &kgsl_driver.process_list, list) {
+		if (p->pid == pid) {
+			if (kgsl_process_private_get(p))
+				private = p;
+			break;
+		}
+	}
+	mutex_unlock(&kgsl_driver.process_mutex);
+	return private;
+}
+
+static struct kgsl_process_private *kgsl_process_private_new(
+		struct kgsl_device *device)
+{
+	struct kgsl_process_private *private;
+	pid_t tgid = task_tgid_nr(current);
+
+	/* Search in the process list */
+	list_for_each_entry(private, &kgsl_driver.process_list, list) {
+		if (private->pid == tgid) {
+			if (!kgsl_process_private_get(private))
+				private = ERR_PTR(-EINVAL);
+			return private;
+		}
+	}
+
+	/* Create a new object */
+	private = kzalloc(sizeof(struct kgsl_process_private), GFP_KERNEL);
+	if (private == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&private->refcount);
+
+	private->pid = tgid;
+	get_task_comm(private->comm, current->group_leader);
+
+	spin_lock_init(&private->mem_lock);
+	spin_lock_init(&private->syncsource_lock);
+
+	idr_init(&private->mem_idr);
+	idr_init(&private->syncsource_idr);
+
+	/* Allocate a pagetable for the new process object */
+	private->pagetable = kgsl_mmu_getpagetable(&device->mmu, tgid);
+	if (IS_ERR(private->pagetable)) {
+		int err = PTR_ERR(private->pagetable);
+
+		idr_destroy(&private->mem_idr);
+		idr_destroy(&private->syncsource_idr);
+
+		kfree(private);
+		private = ERR_PTR(err);
+	}
+
+	return private;
+}
+
+static void process_release_memory(struct kgsl_process_private *private)
+{
+	struct kgsl_mem_entry *entry;
+	int next = 0;
+
+	while (1) {
+		spin_lock(&private->mem_lock);
+		entry = idr_get_next(&private->mem_idr, &next);
+		if (entry == NULL) {
+			spin_unlock(&private->mem_lock);
+			break;
+		}
+		/*
+		 * If the free pending flag is not set it means that user space
+		 * did not free it's reference to this entry, in that case
+		 * free a reference to this entry, other references are from
+		 * within kgsl so they will be freed eventually by kgsl
+		 */
+		if (!entry->pending_free) {
+			entry->pending_free = 1;
+			spin_unlock(&private->mem_lock);
+			kgsl_mem_entry_put(entry);
+		} else {
+			spin_unlock(&private->mem_lock);
+		}
+		next = next + 1;
+	}
+}
+
+static void process_release_sync_sources(struct kgsl_process_private *private)
+{
+	struct kgsl_syncsource *syncsource;
+	int next = 0;
+
+	while (1) {
+		spin_lock(&private->syncsource_lock);
+		syncsource = idr_get_next(&private->syncsource_idr, &next);
+		spin_unlock(&private->syncsource_lock);
+
+		if (syncsource == NULL)
+			break;
+
+		kgsl_syncsource_put(syncsource);
+		next = next + 1;
+	}
+}
+
+static void kgsl_process_private_close(struct kgsl_device_private *dev_priv,
+		struct kgsl_process_private *private)
+{
+	mutex_lock(&kgsl_driver.process_mutex);
+
+	if (--private->fd_count > 0) {
+		mutex_unlock(&kgsl_driver.process_mutex);
+		kgsl_process_private_put(private);
+		return;
+	}
+
+	/*
+	 * If this is the last file on the process take down the debug
+	 * directories and garbage collect any outstanding resources
+	 */
+
+	kgsl_process_uninit_sysfs(private);
+	debugfs_remove_recursive(private->debug_root);
+
+	process_release_sync_sources(private);
+
+	/* When using global pagetables, do not detach global pagetable */
+	if (private->pagetable->name != KGSL_MMU_GLOBAL_PT)
+		kgsl_mmu_detach_pagetable(private->pagetable);
+
+	/* Remove the process struct from the master list */
+	list_del(&private->list);
+
+	/*
+	 * Unlock the mutex before releasing the memory - this prevents a
+	 * deadlock with the IOMMU mutex if a page fault occurs
+	 */
+	mutex_unlock(&kgsl_driver.process_mutex);
+
+	process_release_memory(private);
+
+	kgsl_process_private_put(private);
+}
+
+
+static struct kgsl_process_private *kgsl_process_private_open(
+		struct kgsl_device *device)
+{
+	struct kgsl_process_private *private;
+
+	mutex_lock(&kgsl_driver.process_mutex);
+	private = kgsl_process_private_new(device);
+
+	if (IS_ERR(private))
+		goto done;
+
+	/*
+	 * If this is a new process create the debug directories and add it to
+	 * the process list
+	 */
+
+	if (private->fd_count++ == 0) {
+		kgsl_process_init_sysfs(device, private);
+		kgsl_process_init_debugfs(private);
+
+		list_add(&private->list, &kgsl_driver.process_list);
+	}
+
+done:
+	mutex_unlock(&kgsl_driver.process_mutex);
+	return private;
+}
+
+static int kgsl_close_device(struct kgsl_device *device)
+{
+	int result = 0;
+
+	mutex_lock(&device->mutex);
+	device->open_count--;
+	if (device->open_count == 0) {
+
+		/* Wait for the active count to go to 0 */
+		kgsl_active_count_wait(device, 0);
+
+		/* Fail if the wait times out */
+		BUG_ON(atomic_read(&device->active_cnt) > 0);
+
+		result = kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+	}
+	mutex_unlock(&device->mutex);
+	return result;
+
+}
+
+static void device_release_contexts(struct kgsl_device_private *dev_priv)
+{
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_context *context;
+	int next = 0;
+	int result = 0;
+
+	while (1) {
+		read_lock(&device->context_lock);
+		context = idr_get_next(&device->context_idr, &next);
+
+		if (context == NULL) {
+			read_unlock(&device->context_lock);
+			break;
+		} else if (context->dev_priv == dev_priv) {
+			/*
+			 * Hold a reference to the context in case somebody
+			 * tries to put it while we are detaching
+			 */
+			result = _kgsl_context_get(context);
+		}
+		read_unlock(&device->context_lock);
+
+		if (result) {
+			kgsl_context_detach(context);
+			kgsl_context_put(context);
+			result = 0;
+		}
+
+		next = next + 1;
+	}
+}
+
+static int kgsl_release(struct inode *inodep, struct file *filep)
+{
+	struct kgsl_device_private *dev_priv = filep->private_data;
+	struct kgsl_device *device = dev_priv->device;
+	int result;
+
+	filep->private_data = NULL;
+
+	/* Release the contexts for the file */
+	device_release_contexts(dev_priv);
+
+	/* Close down the process wide resources for the file */
+	kgsl_process_private_close(dev_priv, dev_priv->process_priv);
+
+	kfree(dev_priv);
+
+	result = kgsl_close_device(device);
+	pm_runtime_put(&device->pdev->dev);
+
+	return result;
+}
+
+static int kgsl_open_device(struct kgsl_device *device)
+{
+	int result = 0;
+
+	mutex_lock(&device->mutex);
+	if (device->open_count == 0) {
+		/*
+		 * active_cnt special case: we are starting up for the first
+		 * time, so use this sequence instead of the kgsl_pwrctrl_wake()
+		 * which will be called by kgsl_active_count_get().
+		 */
+		atomic_inc(&device->active_cnt);
+		kgsl_sharedmem_set(device, &device->memstore, 0, 0,
+				device->memstore.size);
+		kgsl_sharedmem_set(device, &device->scratch, 0, 0,
+				device->scratch.size);
+
+		result = device->ftbl->init(device);
+		if (result)
+			goto err;
+
+		result = device->ftbl->start(device, 0);
+		if (result)
+			goto err;
+		/*
+		 * Make sure the gates are open, so they don't block until
+		 * we start suspend or FT.
+		 */
+		complete_all(&device->hwaccess_gate);
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+		kgsl_active_count_put(device);
+	}
+	device->open_count++;
+err:
+	if (result) {
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_INIT);
+		atomic_dec(&device->active_cnt);
+	}
+
+	mutex_unlock(&device->mutex);
+	return result;
+}
+
+static int kgsl_open(struct inode *inodep, struct file *filep)
+{
+	int result;
+	struct kgsl_device_private *dev_priv;
+	struct kgsl_device *device;
+	unsigned int minor = iminor(inodep);
+
+	device = kgsl_get_minor(minor);
+	BUG_ON(device == NULL);
+
+	result = pm_runtime_get_sync(&device->pdev->dev);
+	if (result < 0) {
+		KGSL_DRV_ERR(device,
+			"Runtime PM: Unable to wake up the device, rc = %d\n",
+			result);
+		return result;
+	}
+	result = 0;
+
+	dev_priv = kzalloc(sizeof(struct kgsl_device_private), GFP_KERNEL);
+	if (dev_priv == NULL) {
+		result = -ENOMEM;
+		goto err;
+	}
+
+	dev_priv->device = device;
+	filep->private_data = dev_priv;
+
+	result = kgsl_open_device(device);
+	if (result)
+		goto err;
+
+	/*
+	 * Get file (per process) private struct. This must be done
+	 * after the first start so that the global pagetable mappings
+	 * are set up before we create the per-process pagetable.
+	 */
+	dev_priv->process_priv = kgsl_process_private_open(device);
+	if (IS_ERR(dev_priv->process_priv)) {
+		result = PTR_ERR(dev_priv->process_priv);
+		kgsl_close_device(device);
+		goto err;
+	}
+
+err:
+	if (result) {
+		filep->private_data = NULL;
+		kfree(dev_priv);
+		pm_runtime_put(&device->pdev->dev);
+	}
+	return result;
+}
+
+#define GPUADDR_IN_MEMDESC(_val, _memdesc) \
+	(((_val) >= (_memdesc)->gpuaddr) && \
+	 ((_val) < ((_memdesc)->gpuaddr + (_memdesc)->size)))
+
+/**
+ * kgsl_sharedmem_find() - Find a gpu memory allocation
+ *
+ * @private: private data for the process to check.
+ * @gpuaddr: start address of the region
+ *
+ * Find a gpu allocation. Caller must kgsl_mem_entry_put()
+ * the returned entry when finished using it.
+ */
+struct kgsl_mem_entry * __must_check
+kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr)
+{
+	int ret = 0, id;
+	struct kgsl_mem_entry *entry = NULL;
+
+	if (!private)
+		return NULL;
+
+	if (!kgsl_mmu_gpuaddr_in_range(private->pagetable, gpuaddr))
+		return NULL;
+
+	spin_lock(&private->mem_lock);
+	idr_for_each_entry(&private->mem_idr, entry, id) {
+		if (GPUADDR_IN_MEMDESC(gpuaddr, &entry->memdesc)) {
+			ret = kgsl_mem_entry_get(entry);
+			break;
+		}
+	}
+	spin_unlock(&private->mem_lock);
+
+	return (ret == 0) ? NULL : entry;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_find);
+
+struct kgsl_mem_entry * __must_check
+kgsl_sharedmem_find_id_flags(struct kgsl_process_private *process,
+		unsigned int id, uint64_t flags)
+{
+	int count = 0;
+	struct kgsl_mem_entry *entry;
+
+	spin_lock(&process->mem_lock);
+	entry = idr_find(&process->mem_idr, id);
+	if (entry)
+		if (!entry->pending_free &&
+				(flags & entry->memdesc.flags) == flags)
+			count = kgsl_mem_entry_get(entry);
+	spin_unlock(&process->mem_lock);
+
+	return (count == 0) ? NULL : entry;
+}
+
+/**
+ * kgsl_sharedmem_find_id() - find a memory entry by id
+ * @process: the owning process
+ * @id: id to find
+ *
+ * @returns - the mem_entry or NULL
+ *
+ * Caller must kgsl_mem_entry_put() the returned entry, when finished using
+ * it.
+ */
+struct kgsl_mem_entry * __must_check
+kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id)
+{
+	return kgsl_sharedmem_find_id_flags(process, id, 0);
+}
+
+/**
+ * kgsl_mem_entry_unset_pend() - Unset the pending free flag of an entry
+ * @entry - The memory entry
+ */
+static inline void kgsl_mem_entry_unset_pend(struct kgsl_mem_entry *entry)
+{
+	if (entry == NULL)
+		return;
+	spin_lock(&entry->priv->mem_lock);
+	entry->pending_free = 0;
+	spin_unlock(&entry->priv->mem_lock);
+}
+
+/**
+ * kgsl_mem_entry_set_pend() - Set the pending free flag of a memory entry
+ * @entry - The memory entry
+ *
+ * @returns - true if pending flag was 0 else false
+ *
+ * This function will set the pending free flag if it is previously unset. Used
+ * to prevent race condition between ioctls calling free/freememontimestamp
+ * on the same entry. Whichever thread set's the flag first will do the free.
+ */
+static inline bool kgsl_mem_entry_set_pend(struct kgsl_mem_entry *entry)
+{
+	bool ret = false;
+
+	if (entry == NULL)
+		return false;
+
+	spin_lock(&entry->priv->mem_lock);
+	if (!entry->pending_free) {
+		entry->pending_free = 1;
+		ret = true;
+	}
+	spin_unlock(&entry->priv->mem_lock);
+	return ret;
+}
+
+/*call all ioctl sub functions with driver locked*/
+long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv,
+					  unsigned int cmd, void *data)
+{
+	int result = 0;
+	struct kgsl_device_getproperty *param = data;
+
+	switch (param->type) {
+	case KGSL_PROP_VERSION:
+	{
+		struct kgsl_version version;
+
+		if (param->sizebytes != sizeof(version)) {
+			result = -EINVAL;
+			break;
+		}
+
+		version.drv_major = KGSL_VERSION_MAJOR;
+		version.drv_minor = KGSL_VERSION_MINOR;
+		version.dev_major = dev_priv->device->ver_major;
+		version.dev_minor = dev_priv->device->ver_minor;
+
+		if (copy_to_user(param->value, &version, sizeof(version)))
+			result = -EFAULT;
+
+		break;
+	}
+	case KGSL_PROP_GPU_RESET_STAT:
+	{
+		/* Return reset status of given context and clear it */
+		uint32_t id;
+		struct kgsl_context *context;
+
+		if (param->sizebytes != sizeof(unsigned int)) {
+			result = -EINVAL;
+			break;
+		}
+		/* We expect the value passed in to contain the context id */
+		if (copy_from_user(&id, param->value,
+			sizeof(unsigned int))) {
+			result = -EFAULT;
+			break;
+		}
+		context = kgsl_context_get_owner(dev_priv, id);
+		if (!context) {
+			result = -EINVAL;
+			break;
+		}
+		/*
+		 * Copy the reset status to value which also serves as
+		 * the out parameter
+		 */
+		if (copy_to_user(param->value, &(context->reset_status),
+			sizeof(unsigned int)))
+			result = -EFAULT;
+		else {
+			/* Clear reset status once its been queried */
+			context->reset_status = KGSL_CTX_STAT_NO_ERROR;
+		}
+
+		kgsl_context_put(context);
+		break;
+	}
+	default:
+		if (is_compat_task())
+			result = dev_priv->device->ftbl->getproperty_compat(
+					dev_priv->device, param->type,
+					param->value, param->sizebytes);
+		else
+			result = dev_priv->device->ftbl->getproperty(
+					dev_priv->device, param->type,
+					param->value, param->sizebytes);
+	}
+
+
+	return result;
+}
+
+long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv,
+					  unsigned int cmd, void *data)
+{
+	int result = 0;
+	/* The getproperty struct is reused for setproperty too */
+	struct kgsl_device_getproperty *param = data;
+
+	/* Reroute to compat version if coming from compat_ioctl */
+	if (is_compat_task())
+		result = dev_priv->device->ftbl->setproperty_compat(
+			dev_priv, param->type, param->value,
+			param->sizebytes);
+	else if (dev_priv->device->ftbl->setproperty)
+		result = dev_priv->device->ftbl->setproperty(
+			dev_priv, param->type, param->value,
+			param->sizebytes);
+
+	return result;
+}
+
+long kgsl_ioctl_device_waittimestamp_ctxtid(
+		struct kgsl_device_private *dev_priv, unsigned int cmd,
+		void *data)
+{
+	struct kgsl_device_waittimestamp_ctxtid *param = data;
+	struct kgsl_device *device = dev_priv->device;
+	long result = -EINVAL;
+	unsigned int temp_cur_ts = 0;
+	struct kgsl_context *context;
+
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+	if (context == NULL)
+		return result;
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED,
+		&temp_cur_ts);
+
+	trace_kgsl_waittimestamp_entry(device, context->id, temp_cur_ts,
+		param->timestamp, param->timeout);
+
+	result = device->ftbl->waittimestamp(device, context, param->timestamp,
+		param->timeout);
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED,
+		&temp_cur_ts);
+	trace_kgsl_waittimestamp_exit(device, temp_cur_ts, result);
+
+	kgsl_context_put(context);
+
+	return result;
+}
+
+long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
+				      unsigned int cmd, void *data)
+{
+	struct kgsl_ringbuffer_issueibcmds *param = data;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_context *context;
+	struct kgsl_drawobj *drawobj;
+	struct kgsl_drawobj_cmd *cmdobj;
+	long result = -EINVAL;
+
+	/* The legacy functions don't support synchronization commands */
+	if ((param->flags & (KGSL_DRAWOBJ_SYNC | KGSL_DRAWOBJ_MARKER)))
+		return -EINVAL;
+
+	/* Sanity check the number of IBs */
+	if (param->flags & KGSL_DRAWOBJ_SUBMIT_IB_LIST &&
+			(param->numibs == 0 || param->numibs > KGSL_MAX_NUMIBS))
+		return -EINVAL;
+
+	/* Get the context */
+	context = kgsl_context_get_owner(dev_priv, param->drawctxt_id);
+	if (context == NULL)
+		return -EINVAL;
+
+	cmdobj = kgsl_drawobj_cmd_create(device, context, param->flags,
+					CMDOBJ_TYPE);
+	if (IS_ERR(cmdobj)) {
+		kgsl_context_put(context);
+		return PTR_ERR(cmdobj);
+	}
+
+	drawobj = DRAWOBJ(cmdobj);
+
+	if (param->flags & KGSL_DRAWOBJ_SUBMIT_IB_LIST)
+		result = kgsl_drawobj_cmd_add_ibdesc_list(device, cmdobj,
+			(void __user *) param->ibdesc_addr,
+			param->numibs);
+	else {
+		struct kgsl_ibdesc ibdesc;
+		/* Ultra legacy path */
+
+		ibdesc.gpuaddr = param->ibdesc_addr;
+		ibdesc.sizedwords = param->numibs;
+		ibdesc.ctrl = 0;
+
+		result = kgsl_drawobj_cmd_add_ibdesc(device, cmdobj, &ibdesc);
+	}
+
+	if (result == 0)
+		result = dev_priv->device->ftbl->queue_cmds(dev_priv, context,
+				&drawobj, 1, &param->timestamp);
+
+	/*
+	 * -EPROTO is a "success" error - it just tells the user that the
+	 * context had previously faulted
+	 */
+	if (result && result != -EPROTO)
+		kgsl_drawobj_destroy(drawobj);
+
+	kgsl_context_put(context);
+	return result;
+}
+
+/* Returns 0 on failure.  Returns command type(s) on success */
+static unsigned int _process_command_input(struct kgsl_device *device,
+		unsigned int flags, unsigned int numcmds,
+		unsigned int numobjs, unsigned int numsyncs)
+{
+	if (numcmds > KGSL_MAX_NUMIBS ||
+			numobjs > KGSL_MAX_NUMIBS ||
+			numsyncs > KGSL_MAX_SYNCPOINTS)
+		return 0;
+
+	/*
+	 * The SYNC bit is supposed to identify a dummy sync object
+	 * so warn the user if they specified any IBs with it.
+	 * A MARKER command can either have IBs or not but if the
+	 * command has 0 IBs it is automatically assumed to be a marker.
+	 */
+
+	/* If they specify the flag, go with what they say */
+	if (flags & KGSL_DRAWOBJ_MARKER)
+		return MARKEROBJ_TYPE;
+	else if (flags & KGSL_DRAWOBJ_SYNC)
+		return SYNCOBJ_TYPE;
+
+	/* If not, deduce what they meant */
+	if (numsyncs && numcmds)
+		return SYNCOBJ_TYPE | CMDOBJ_TYPE;
+	else if (numsyncs)
+		return SYNCOBJ_TYPE;
+	else if (numcmds)
+		return CMDOBJ_TYPE;
+	else if (numcmds == 0)
+		return MARKEROBJ_TYPE;
+
+	return 0;
+}
+
+long kgsl_ioctl_submit_commands(struct kgsl_device_private *dev_priv,
+				      unsigned int cmd, void *data)
+{
+	struct kgsl_submit_commands *param = data;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_context *context;
+	struct kgsl_drawobj *drawobj[2];
+	unsigned int type;
+	long result;
+	unsigned int i = 0;
+
+	type = _process_command_input(device, param->flags, param->numcmds, 0,
+			param->numsyncs);
+	if (!type)
+		return -EINVAL;
+
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+	if (context == NULL)
+		return -EINVAL;
+
+	if (type & SYNCOBJ_TYPE) {
+		struct kgsl_drawobj_sync *syncobj =
+				kgsl_drawobj_sync_create(device, context);
+		if (IS_ERR(syncobj)) {
+			result = PTR_ERR(syncobj);
+			goto done;
+		}
+
+		drawobj[i++] = DRAWOBJ(syncobj);
+
+		result = kgsl_drawobj_sync_add_syncpoints(device, syncobj,
+				param->synclist, param->numsyncs);
+		if (result)
+			goto done;
+	}
+
+	if (type & (CMDOBJ_TYPE | MARKEROBJ_TYPE)) {
+		struct kgsl_drawobj_cmd *cmdobj =
+				kgsl_drawobj_cmd_create(device,
+					context, param->flags, type);
+		if (IS_ERR(cmdobj)) {
+			result = PTR_ERR(cmdobj);
+			goto done;
+		}
+
+		drawobj[i++] = DRAWOBJ(cmdobj);
+
+		result = kgsl_drawobj_cmd_add_ibdesc_list(device, cmdobj,
+				param->cmdlist, param->numcmds);
+		if (result)
+			goto done;
+
+		/* If no profiling buffer was specified, clear the flag */
+		if (cmdobj->profiling_buf_entry == NULL)
+			DRAWOBJ(cmdobj)->flags &= ~KGSL_DRAWOBJ_PROFILING;
+	}
+
+	result = device->ftbl->queue_cmds(dev_priv, context, drawobj,
+			i, &param->timestamp);
+
+done:
+	/*
+	 * -EPROTO is a "success" error - it just tells the user that the
+	 * context had previously faulted
+	 */
+	if (result && result != -EPROTO)
+		while (i--)
+			kgsl_drawobj_destroy(drawobj[i]);
+
+
+	kgsl_context_put(context);
+	return result;
+}
+
+long kgsl_ioctl_gpu_command(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_gpu_command *param = data;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_context *context;
+	struct kgsl_drawobj *drawobj[2];
+	unsigned int type;
+	long result;
+	unsigned int i = 0;
+
+	type = _process_command_input(device, param->flags, param->numcmds,
+			param->numobjs, param->numsyncs);
+	if (!type)
+		return -EINVAL;
+
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+	if (context == NULL)
+		return -EINVAL;
+
+	if (type & SYNCOBJ_TYPE) {
+		struct kgsl_drawobj_sync *syncobj =
+				kgsl_drawobj_sync_create(device, context);
+
+		if (IS_ERR(syncobj)) {
+			result = PTR_ERR(syncobj);
+			goto done;
+		}
+
+		drawobj[i++] = DRAWOBJ(syncobj);
+
+		result = kgsl_drawobj_sync_add_synclist(device, syncobj,
+				to_user_ptr(param->synclist),
+				param->syncsize, param->numsyncs);
+		if (result)
+			goto done;
+	}
+
+	if (type & (CMDOBJ_TYPE | MARKEROBJ_TYPE)) {
+		struct kgsl_drawobj_cmd *cmdobj =
+				kgsl_drawobj_cmd_create(device,
+					context, param->flags, type);
+
+		if (IS_ERR(cmdobj)) {
+			result = PTR_ERR(cmdobj);
+			goto done;
+		}
+
+		drawobj[i++] = DRAWOBJ(cmdobj);
+
+		result = kgsl_drawobj_cmd_add_cmdlist(device, cmdobj,
+			to_user_ptr(param->cmdlist),
+			param->cmdsize, param->numcmds);
+		if (result)
+			goto done;
+
+		result = kgsl_drawobj_cmd_add_memlist(device, cmdobj,
+			to_user_ptr(param->objlist),
+			param->objsize, param->numobjs);
+		if (result)
+			goto done;
+
+		/* If no profiling buffer was specified, clear the flag */
+		if (cmdobj->profiling_buf_entry == NULL)
+			DRAWOBJ(cmdobj)->flags &= ~KGSL_DRAWOBJ_PROFILING;
+	}
+
+	result = device->ftbl->queue_cmds(dev_priv, context, drawobj,
+				i, &param->timestamp);
+
+done:
+	/*
+	 * -EPROTO is a "success" error - it just tells the user that the
+	 * context had previously faulted
+	 */
+	if (result && result != -EPROTO)
+		while (i--)
+			kgsl_drawobj_destroy(drawobj[i]);
+
+	kgsl_context_put(context);
+	return result;
+}
+
+long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data)
+{
+	struct kgsl_cmdstream_readtimestamp_ctxtid *param = data;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_context *context;
+	long result = -EINVAL;
+
+	mutex_lock(&device->mutex);
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+
+	if (context) {
+		result = kgsl_readtimestamp(device, context,
+			param->type, &param->timestamp);
+
+		trace_kgsl_readtimestamp(device, context->id,
+			param->type, param->timestamp);
+	}
+
+	kgsl_context_put(context);
+	mutex_unlock(&device->mutex);
+	return result;
+}
+
+long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	int result = 0;
+	struct kgsl_drawctxt_create *param = data;
+	struct kgsl_context *context = NULL;
+	struct kgsl_device *device = dev_priv->device;
+
+	context = device->ftbl->drawctxt_create(dev_priv, &param->flags);
+	if (IS_ERR(context)) {
+		result = PTR_ERR(context);
+		goto done;
+	}
+	trace_kgsl_context_create(dev_priv->device, context, param->flags);
+
+	/* Commit the pointer to the context in context_idr */
+	write_lock(&device->context_lock);
+	idr_replace(&device->context_idr, context, context->id);
+	write_unlock(&device->context_lock);
+
+	param->drawctxt_id = context->id;
+done:
+	return result;
+}
+
+long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_drawctxt_destroy *param = data;
+	struct kgsl_context *context;
+
+	context = kgsl_context_get_owner(dev_priv, param->drawctxt_id);
+	if (context == NULL)
+		return -EINVAL;
+
+	kgsl_context_detach(context);
+	kgsl_context_put(context);
+
+	return 0;
+}
+
+static long gpumem_free_entry(struct kgsl_mem_entry *entry)
+{
+	pid_t ptname = 0;
+
+	if (!kgsl_mem_entry_set_pend(entry))
+		return -EBUSY;
+
+	trace_kgsl_mem_free(entry);
+
+	if (entry->memdesc.pagetable != NULL)
+		ptname = entry->memdesc.pagetable->name;
+
+	kgsl_memfree_add(entry->priv->pid, ptname, entry->memdesc.gpuaddr,
+		entry->memdesc.size, entry->memdesc.flags);
+
+	kgsl_mem_entry_put(entry);
+
+	return 0;
+}
+
+static void gpumem_free_func(struct kgsl_device *device,
+		struct kgsl_event_group *group, void *priv, int ret)
+{
+	struct kgsl_context *context = group->context;
+	struct kgsl_mem_entry *entry = priv;
+	unsigned int timestamp;
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &timestamp);
+
+	/* Free the memory for all event types */
+	trace_kgsl_mem_timestamp_free(device, entry, KGSL_CONTEXT_ID(context),
+		timestamp, 0);
+	kgsl_mem_entry_put(entry);
+}
+
+static long gpumem_free_entry_on_timestamp(struct kgsl_device *device,
+		struct kgsl_mem_entry *entry,
+		struct kgsl_context *context, unsigned int timestamp)
+{
+	int ret;
+	unsigned int temp;
+
+	if (!kgsl_mem_entry_set_pend(entry))
+		return -EBUSY;
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &temp);
+	trace_kgsl_mem_timestamp_queue(device, entry, context->id, temp,
+		timestamp);
+	ret = kgsl_add_event(device, &context->events,
+		timestamp, gpumem_free_func, entry);
+
+	if (ret)
+		kgsl_mem_entry_unset_pend(entry);
+
+	return ret;
+}
+
+long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_sharedmem_free *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry;
+	long ret;
+
+	entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr);
+	if (entry == NULL)
+		return -EINVAL;
+
+	ret = gpumem_free_entry(entry);
+	kgsl_mem_entry_put(entry);
+
+	return ret;
+}
+
+long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_gpumem_free_id *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry;
+	long ret;
+
+	entry = kgsl_sharedmem_find_id(private, param->id);
+	if (entry == NULL)
+		return -EINVAL;
+
+	ret = gpumem_free_entry(entry);
+	kgsl_mem_entry_put(entry);
+
+	return ret;
+}
+
+static long gpuobj_free_on_timestamp(struct kgsl_device_private *dev_priv,
+		struct kgsl_mem_entry *entry, struct kgsl_gpuobj_free *param)
+{
+	struct kgsl_gpu_event_timestamp event;
+	struct kgsl_context *context;
+	long ret;
+
+	memset(&event, 0, sizeof(event));
+
+	ret = _copy_from_user(&event, to_user_ptr(param->priv),
+		sizeof(event), param->len);
+	if (ret)
+		return ret;
+
+	if (event.context_id == 0)
+		return -EINVAL;
+
+	context = kgsl_context_get_owner(dev_priv, event.context_id);
+	if (context == NULL)
+		return -EINVAL;
+
+	ret = gpumem_free_entry_on_timestamp(dev_priv->device, entry, context,
+		event.timestamp);
+
+	kgsl_context_put(context);
+	return ret;
+}
+
+static void gpuobj_free_fence_func(void *priv)
+{
+	struct kgsl_mem_entry *entry = priv;
+
+	INIT_WORK(&entry->work, _deferred_put);
+	queue_work(kgsl_driver.mem_workqueue, &entry->work);
+}
+
+static long gpuobj_free_on_fence(struct kgsl_device_private *dev_priv,
+		struct kgsl_mem_entry *entry, struct kgsl_gpuobj_free *param)
+{
+	struct kgsl_sync_fence_waiter *handle;
+	struct kgsl_gpu_event_fence event;
+	long ret;
+
+	if (!kgsl_mem_entry_set_pend(entry))
+		return -EBUSY;
+
+	memset(&event, 0, sizeof(event));
+
+	ret = _copy_from_user(&event, to_user_ptr(param->priv),
+		sizeof(event), param->len);
+	if (ret) {
+		kgsl_mem_entry_unset_pend(entry);
+		return ret;
+	}
+
+	if (event.fd < 0) {
+		kgsl_mem_entry_unset_pend(entry);
+		return -EINVAL;
+	}
+
+	handle = kgsl_sync_fence_async_wait(event.fd,
+		gpuobj_free_fence_func, entry);
+
+	/* if handle is NULL the fence has already signaled */
+	if (handle == NULL)
+		return gpumem_free_entry(entry);
+
+	if (IS_ERR(handle)) {
+		kgsl_mem_entry_unset_pend(entry);
+		return PTR_ERR(handle);
+	}
+
+	return 0;
+}
+
+long kgsl_ioctl_gpuobj_free(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_gpuobj_free *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry;
+	long ret;
+
+	entry = kgsl_sharedmem_find_id(private, param->id);
+	if (entry == NULL)
+		return -EINVAL;
+
+	/* If no event is specified then free immediately */
+	if (!(param->flags & KGSL_GPUOBJ_FREE_ON_EVENT))
+		ret = gpumem_free_entry(entry);
+	else if (param->type == KGSL_GPU_EVENT_TIMESTAMP)
+		ret = gpuobj_free_on_timestamp(dev_priv, entry, param);
+	else if (param->type == KGSL_GPU_EVENT_FENCE)
+		ret = gpuobj_free_on_fence(dev_priv, entry, param);
+	else
+		ret = -EINVAL;
+
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
+long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid(
+		struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_cmdstream_freememontimestamp_ctxtid *param = data;
+	struct kgsl_context *context = NULL;
+	struct kgsl_mem_entry *entry;
+	long ret = -EINVAL;
+
+	if (param->type != KGSL_TIMESTAMP_RETIRED)
+		return -EINVAL;
+
+	context = kgsl_context_get_owner(dev_priv, param->context_id);
+	if (context == NULL)
+		return -EINVAL;
+
+	entry = kgsl_sharedmem_find(dev_priv->process_priv,
+		(uint64_t) param->gpuaddr);
+	if (entry == NULL) {
+		kgsl_context_put(context);
+		return -EINVAL;
+	}
+
+	ret = gpumem_free_entry_on_timestamp(dev_priv->device, entry,
+		context, param->timestamp);
+
+	kgsl_mem_entry_put(entry);
+	kgsl_context_put(context);
+
+	return ret;
+}
+
+static inline int _check_region(unsigned long start, unsigned long size,
+				uint64_t len)
+{
+	uint64_t end = ((uint64_t) start) + size;
+
+	return (end > len);
+}
+
+static int check_vma_flags(struct vm_area_struct *vma,
+		unsigned int flags)
+{
+	unsigned long flags_requested = (VM_READ | VM_WRITE);
+
+	if (flags & KGSL_MEMFLAGS_GPUREADONLY)
+		flags_requested &= ~VM_WRITE;
+
+	if ((vma->vm_flags & flags_requested) == flags_requested)
+		return 0;
+
+	return -EFAULT;
+}
+
+static int check_vma(struct vm_area_struct *vma, struct file *vmfile,
+		struct kgsl_memdesc *memdesc)
+{
+	if (vma == NULL || vma->vm_file != vmfile)
+		return -EINVAL;
+
+	/* userspace may not know the size, in which case use the whole vma */
+	if (memdesc->size == 0)
+		memdesc->size = vma->vm_end - vma->vm_start;
+	/* range checking */
+	if (vma->vm_start != memdesc->useraddr ||
+		(memdesc->useraddr + memdesc->size) != vma->vm_end)
+		return -EINVAL;
+	return check_vma_flags(vma, memdesc->flags);
+}
+
+static int memdesc_sg_virt(struct kgsl_memdesc *memdesc, struct file *vmfile)
+{
+	int ret = 0;
+	long npages = 0, i;
+	size_t sglen = (size_t) (memdesc->size / PAGE_SIZE);
+	struct page **pages = NULL;
+	int write = ((memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY) ? 0 :
+								FOLL_WRITE);
+
+	if (sglen == 0 || sglen >= LONG_MAX)
+		return -EINVAL;
+
+	pages = kgsl_malloc(sglen * sizeof(struct page *));
+	if (pages == NULL)
+		return -ENOMEM;
+
+	memdesc->sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (memdesc->sgt == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	down_read(&current->mm->mmap_sem);
+	/* If we have vmfile, make sure we map the correct vma and map it all */
+	if (vmfile != NULL)
+		ret = check_vma(find_vma(current->mm, memdesc->useraddr),
+				vmfile, memdesc);
+
+	if (ret == 0) {
+		npages = get_user_pages(memdesc->useraddr,
+					sglen, write, pages, NULL);
+		ret = (npages < 0) ? (int)npages : 0;
+	}
+	up_read(&current->mm->mmap_sem);
+
+	if (ret)
+		goto out;
+
+	if ((unsigned long) npages != sglen) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = sg_alloc_table_from_pages(memdesc->sgt, pages, npages,
+					0, memdesc->size, GFP_KERNEL);
+out:
+	if (ret) {
+		for (i = 0; i < npages; i++)
+			put_page(pages[i]);
+
+		kfree(memdesc->sgt);
+		memdesc->sgt = NULL;
+	}
+	kgsl_free(pages);
+	return ret;
+}
+
+static int kgsl_setup_anon_useraddr(struct kgsl_pagetable *pagetable,
+	struct kgsl_mem_entry *entry, unsigned long hostptr,
+	size_t offset, size_t size)
+{
+	/* Map an anonymous memory chunk */
+
+	if (size == 0 || offset != 0 ||
+		!IS_ALIGNED(size, PAGE_SIZE))
+		return -EINVAL;
+
+	entry->memdesc.pagetable = pagetable;
+	entry->memdesc.size = (uint64_t) size;
+	entry->memdesc.useraddr = hostptr;
+	entry->memdesc.flags |= KGSL_MEMFLAGS_USERMEM_ADDR;
+
+	if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
+		int ret;
+
+		/* Register the address in the database */
+		ret = kgsl_mmu_set_svm_region(pagetable,
+			(uint64_t) entry->memdesc.useraddr, (uint64_t) size);
+
+		if (ret)
+			return ret;
+
+		entry->memdesc.gpuaddr = (uint64_t)  entry->memdesc.useraddr;
+	}
+
+	return memdesc_sg_virt(&entry->memdesc, NULL);
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int match_file(const void *p, struct file *file, unsigned int fd)
+{
+	/*
+	 * We must return fd + 1 because iterate_fd stops searching on
+	 * non-zero return, but 0 is a valid fd.
+	 */
+	return (p == file) ? (fd + 1) : 0;
+}
+
+static void _setup_cache_mode(struct kgsl_mem_entry *entry,
+		struct vm_area_struct *vma)
+{
+	unsigned int mode;
+	pgprot_t pgprot = vma->vm_page_prot;
+
+	if (pgprot_val(pgprot) == pgprot_val(pgprot_noncached(pgprot)))
+		mode = KGSL_CACHEMODE_UNCACHED;
+	else if (pgprot_val(pgprot) == pgprot_val(pgprot_writecombine(pgprot)))
+		mode = KGSL_CACHEMODE_WRITECOMBINE;
+	else
+		mode = KGSL_CACHEMODE_WRITEBACK;
+
+	entry->memdesc.flags |= (mode << KGSL_CACHEMODE_SHIFT);
+}
+
+static int kgsl_setup_dma_buf(struct kgsl_device *device,
+				struct kgsl_pagetable *pagetable,
+				struct kgsl_mem_entry *entry,
+				struct dma_buf *dmabuf);
+
+static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry, unsigned long hostptr)
+{
+	struct vm_area_struct *vma;
+	struct dma_buf *dmabuf = NULL;
+	int ret;
+
+	/*
+	 * Find the VMA containing this pointer and figure out if it
+	 * is a dma-buf.
+	 */
+	down_read(&current->mm->mmap_sem);
+	vma = find_vma(current->mm, hostptr);
+
+	if (vma && vma->vm_file) {
+		int fd;
+
+		ret = check_vma_flags(vma, entry->memdesc.flags);
+		if (ret) {
+			up_read(&current->mm->mmap_sem);
+			return ret;
+		}
+
+		/*
+		 * Check to see that this isn't our own memory that we have
+		 * already mapped
+		 */
+		if (vma->vm_file->f_op == &kgsl_fops) {
+			up_read(&current->mm->mmap_sem);
+			return -EFAULT;
+		}
+
+		/* Look for the fd that matches this the vma file */
+		fd = iterate_fd(current->files, 0, match_file, vma->vm_file);
+		if (fd != 0)
+			dmabuf = dma_buf_get(fd - 1);
+	}
+	up_read(&current->mm->mmap_sem);
+
+	if (IS_ERR_OR_NULL(dmabuf))
+		return dmabuf ? PTR_ERR(dmabuf) : -ENODEV;
+
+	ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf);
+	if (ret) {
+		dma_buf_put(dmabuf);
+		return ret;
+	}
+
+	/* Setup the user addr/cache mode for cache operations */
+	entry->memdesc.useraddr = hostptr;
+	_setup_cache_mode(entry, vma);
+
+	return 0;
+}
+#else
+static int kgsl_setup_dmabuf_useraddr(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry, unsigned long hostptr)
+{
+	return -ENODEV;
+}
+#endif
+
+static int kgsl_setup_useraddr(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry,
+		unsigned long hostptr, size_t offset, size_t size)
+{
+	int ret;
+
+	if (hostptr == 0 || !IS_ALIGNED(hostptr, PAGE_SIZE))
+		return -EINVAL;
+
+	/* Try to set up a dmabuf - if it returns -ENODEV assume anonymous */
+	ret = kgsl_setup_dmabuf_useraddr(device, pagetable, entry, hostptr);
+	if (ret != -ENODEV)
+		return ret;
+
+	/* Okay - lets go legacy */
+	return kgsl_setup_anon_useraddr(pagetable, entry,
+		hostptr, offset, size);
+}
+
+static long _gpuobj_map_useraddr(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry,
+		struct kgsl_gpuobj_import *param)
+{
+	struct kgsl_gpuobj_import_useraddr useraddr;
+	int ret;
+
+	param->flags &= KGSL_MEMFLAGS_GPUREADONLY
+		| KGSL_CACHEMODE_MASK
+		| KGSL_MEMTYPE_MASK
+		| KGSL_MEMFLAGS_FORCE_32BIT;
+
+	/* Specifying SECURE is an explicit error */
+	if (param->flags & KGSL_MEMFLAGS_SECURE)
+		return -ENOTSUPP;
+
+	ret = _copy_from_user(&useraddr,
+		to_user_ptr(param->priv), sizeof(useraddr),
+		param->priv_len);
+	if (ret)
+		return ret;
+
+	/* Verify that the virtaddr and len are within bounds */
+	if (useraddr.virtaddr > ULONG_MAX)
+		return -EINVAL;
+
+	return kgsl_setup_useraddr(device, pagetable, entry,
+		(unsigned long) useraddr.virtaddr, 0, param->priv_len);
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static long _gpuobj_map_dma_buf(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry,
+		struct kgsl_gpuobj_import *param,
+		int *fd)
+{
+	struct kgsl_gpuobj_import_dma_buf buf;
+	struct dma_buf *dmabuf;
+	int ret;
+
+	/*
+	 * If content protection is not enabled and secure buffer
+	 * is requested to be mapped return error.
+	 */
+	if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) {
+		if (!kgsl_mmu_is_secured(&device->mmu)) {
+			dev_WARN_ONCE(device->dev, 1,
+				"Secure buffer not supported");
+			return -ENOTSUPP;
+		}
+
+		entry->memdesc.priv |= KGSL_MEMDESC_SECURE;
+	}
+
+	ret = _copy_from_user(&buf, to_user_ptr(param->priv),
+			sizeof(buf), param->priv_len);
+	if (ret)
+		return ret;
+
+	if (buf.fd < 0)
+		return -EINVAL;
+
+	*fd = buf.fd;
+	dmabuf = dma_buf_get(buf.fd);
+
+	if (IS_ERR_OR_NULL(dmabuf))
+		return (dmabuf == NULL) ? -EINVAL : PTR_ERR(dmabuf);
+
+	ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf);
+	if (ret)
+		dma_buf_put(dmabuf);
+
+	return ret;
+}
+#else
+static long _gpuobj_map_dma_buf(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry,
+		struct kgsl_gpuobj_import *param,
+		int *fd)
+{
+	return -EINVAL;
+}
+#endif
+
+long kgsl_ioctl_gpuobj_import(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_gpuobj_import *param = data;
+	struct kgsl_mem_entry *entry;
+	int ret, fd = -1;
+	struct kgsl_mmu *mmu = &dev_priv->device->mmu;
+
+	entry = kgsl_mem_entry_create();
+	if (entry == NULL)
+		return -ENOMEM;
+
+	param->flags &= KGSL_MEMFLAGS_GPUREADONLY
+			| KGSL_MEMTYPE_MASK
+			| KGSL_MEMALIGN_MASK
+			| KGSL_MEMFLAGS_USE_CPU_MAP
+			| KGSL_MEMFLAGS_SECURE
+			| KGSL_MEMFLAGS_FORCE_32BIT;
+
+	entry->memdesc.flags = param->flags;
+
+	if (MMU_FEATURE(mmu, KGSL_MMU_NEED_GUARD_PAGE))
+		entry->memdesc.priv |= KGSL_MEMDESC_GUARD_PAGE;
+
+	if (param->type == KGSL_USER_MEM_TYPE_ADDR)
+		ret = _gpuobj_map_useraddr(dev_priv->device, private->pagetable,
+			entry, param);
+	else if (param->type == KGSL_USER_MEM_TYPE_DMABUF)
+		ret = _gpuobj_map_dma_buf(dev_priv->device, private->pagetable,
+			entry, param, &fd);
+	else
+		ret = -ENOTSUPP;
+
+	if (ret)
+		goto out;
+
+	if (entry->memdesc.size >= SZ_1M)
+		kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_1M));
+	else if (entry->memdesc.size >= SZ_64K)
+		kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64K));
+
+	param->flags = entry->memdesc.flags;
+
+	ret = kgsl_mem_entry_attach_process(dev_priv->device, private, entry);
+	if (ret)
+		goto unmap;
+
+	param->id = entry->id;
+
+	KGSL_STATS_ADD(entry->memdesc.size, &kgsl_driver.stats.mapped,
+		&kgsl_driver.stats.mapped_max);
+
+	kgsl_process_add_stats(private,
+		kgsl_memdesc_usermem_type(&entry->memdesc),
+		entry->memdesc.size);
+
+	trace_kgsl_mem_map(entry, fd);
+
+	kgsl_mem_entry_commit_process(entry);
+	return 0;
+
+unmap:
+	if (param->type == KGSL_USER_MEM_TYPE_DMABUF) {
+		kgsl_destroy_ion(entry->priv_data);
+		entry->memdesc.sgt = NULL;
+	}
+
+	kgsl_sharedmem_free(&entry->memdesc);
+
+out:
+	kfree(entry);
+	return ret;
+}
+
+static long _map_usermem_addr(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable, struct kgsl_mem_entry *entry,
+		unsigned long hostptr, size_t offset, size_t size)
+{
+	if (!MMU_FEATURE(&device->mmu, KGSL_MMU_PAGED))
+		return -EINVAL;
+
+	/* No CPU mapped buffer could ever be secure */
+	if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE)
+		return -EINVAL;
+
+	return kgsl_setup_useraddr(device, pagetable, entry, hostptr,
+		offset, size);
+}
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int _map_usermem_dma_buf(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry,
+		unsigned int fd)
+{
+	int ret;
+	struct dma_buf *dmabuf;
+
+	/*
+	 * If content protection is not enabled and secure buffer
+	 * is requested to be mapped return error.
+	 */
+
+	if (entry->memdesc.flags & KGSL_MEMFLAGS_SECURE) {
+		if (!kgsl_mmu_is_secured(&device->mmu)) {
+			dev_WARN_ONCE(device->dev, 1,
+				"Secure buffer not supported");
+			return -EINVAL;
+		}
+
+		entry->memdesc.priv |= KGSL_MEMDESC_SECURE;
+	}
+
+	dmabuf = dma_buf_get(fd);
+	if (IS_ERR_OR_NULL(dmabuf)) {
+		ret = PTR_ERR(dmabuf);
+		return ret ? ret : -EINVAL;
+	}
+	ret = kgsl_setup_dma_buf(device, pagetable, entry, dmabuf);
+	if (ret)
+		dma_buf_put(dmabuf);
+	return ret;
+}
+#else
+static int _map_usermem_dma_buf(struct kgsl_device *device,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_mem_entry *entry,
+		unsigned int fd)
+{
+	return -EINVAL;
+}
+#endif
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+static int kgsl_setup_dma_buf(struct kgsl_device *device,
+				struct kgsl_pagetable *pagetable,
+				struct kgsl_mem_entry *entry,
+				struct dma_buf *dmabuf)
+{
+	int ret = 0;
+	struct scatterlist *s;
+	struct sg_table *sg_table;
+	struct dma_buf_attachment *attach = NULL;
+	struct kgsl_dma_buf_meta *meta;
+
+	meta = kzalloc(sizeof(*meta), GFP_KERNEL);
+	if (!meta)
+		return -ENOMEM;
+
+	attach = dma_buf_attach(dmabuf, device->dev);
+	if (IS_ERR_OR_NULL(attach)) {
+		ret = attach ? PTR_ERR(attach) : -EINVAL;
+		goto out;
+	}
+
+	meta->dmabuf = dmabuf;
+	meta->attach = attach;
+
+	attach->priv = entry;
+
+	entry->priv_data = meta;
+	entry->memdesc.pagetable = pagetable;
+	entry->memdesc.size = 0;
+	/* USE_CPU_MAP is not impemented for ION. */
+	entry->memdesc.flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);
+	entry->memdesc.flags |= KGSL_MEMFLAGS_USERMEM_ION;
+
+	sg_table = dma_buf_map_attachment(attach, DMA_TO_DEVICE);
+
+	if (IS_ERR_OR_NULL(sg_table)) {
+		ret = PTR_ERR(sg_table);
+		goto out;
+	}
+
+	meta->table = sg_table;
+	entry->priv_data = meta;
+	entry->memdesc.sgt = sg_table;
+
+	/* Calculate the size of the memdesc from the sglist */
+	for (s = entry->memdesc.sgt->sgl; s != NULL; s = sg_next(s)) {
+		int priv = (entry->memdesc.priv & KGSL_MEMDESC_SECURE) ? 1 : 0;
+
+		/*
+		 * Check that each chunk of of the sg table matches the secure
+		 * flag.
+		 */
+
+		if (PagePrivate(sg_page(s)) != priv) {
+			ret = -EPERM;
+			goto out;
+		}
+
+		entry->memdesc.size += (uint64_t) s->length;
+	}
+
+	entry->memdesc.size = PAGE_ALIGN(entry->memdesc.size);
+
+out:
+	if (ret) {
+		if (!IS_ERR_OR_NULL(attach))
+			dma_buf_detach(dmabuf, attach);
+
+
+		kfree(meta);
+	}
+
+	return ret;
+}
+#endif
+
+#ifdef CONFIG_DMA_SHARED_BUFFER
+void kgsl_get_egl_counts(struct kgsl_mem_entry *entry,
+		int *egl_surface_count, int *egl_image_count)
+{
+	struct kgsl_dma_buf_meta *meta = entry->priv_data;
+	struct dma_buf *dmabuf = meta->dmabuf;
+	struct dma_buf_attachment *mem_entry_buf_attachment = meta->attach;
+	struct device *buf_attachment_dev = mem_entry_buf_attachment->dev;
+	struct dma_buf_attachment *attachment = NULL;
+
+	mutex_lock(&dmabuf->lock);
+	list_for_each_entry(attachment, &dmabuf->attachments, node) {
+		struct kgsl_mem_entry *scan_mem_entry = NULL;
+
+		if (attachment->dev != buf_attachment_dev)
+			continue;
+
+		scan_mem_entry = attachment->priv;
+		if (!scan_mem_entry)
+			continue;
+
+		switch (kgsl_memdesc_get_memtype(&scan_mem_entry->memdesc)) {
+		case KGSL_MEMTYPE_EGL_SURFACE:
+			(*egl_surface_count)++;
+			break;
+		case KGSL_MEMTYPE_EGL_IMAGE:
+			(*egl_image_count)++;
+			break;
+		}
+	}
+	mutex_unlock(&dmabuf->lock);
+}
+#else
+void kgsl_get_egl_counts(struct kgsl_mem_entry *entry,
+		int *egl_surface_count, int *egl_image_count)
+{
+}
+#endif
+
+long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv,
+				     unsigned int cmd, void *data)
+{
+	int result = -EINVAL;
+	struct kgsl_map_user_mem *param = data;
+	struct kgsl_mem_entry *entry = NULL;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mmu *mmu = &dev_priv->device->mmu;
+	unsigned int memtype;
+
+	/*
+	 * If content protection is not enabled and secure buffer
+	 * is requested to be mapped return error.
+	 */
+
+	if (param->flags & KGSL_MEMFLAGS_SECURE) {
+		/* Log message and return if context protection isn't enabled */
+		if (!kgsl_mmu_is_secured(mmu)) {
+			dev_WARN_ONCE(dev_priv->device->dev, 1,
+				"Secure buffer not supported");
+			return -EOPNOTSUPP;
+		}
+
+		/* Can't use CPU map with secure buffers */
+		if (param->flags & KGSL_MEMFLAGS_USE_CPU_MAP)
+			return -EINVAL;
+	}
+
+	entry = kgsl_mem_entry_create();
+
+	if (entry == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Convert from enum value to KGSL_MEM_ENTRY value, so that
+	 * we can use the latter consistently everywhere.
+	 */
+	memtype = param->memtype + 1;
+
+	/*
+	 * Mask off unknown flags from userspace. This way the caller can
+	 * check if a flag is supported by looking at the returned flags.
+	 * Note: CACHEMODE is ignored for this call. Caching should be
+	 * determined by type of allocation being mapped.
+	 */
+	param->flags &= KGSL_MEMFLAGS_GPUREADONLY
+			| KGSL_MEMTYPE_MASK
+			| KGSL_MEMALIGN_MASK
+			| KGSL_MEMFLAGS_USE_CPU_MAP
+			| KGSL_MEMFLAGS_SECURE;
+	entry->memdesc.flags = ((uint64_t) param->flags)
+		| KGSL_MEMFLAGS_FORCE_32BIT;
+
+	if (!kgsl_mmu_use_cpu_map(mmu))
+		entry->memdesc.flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);
+
+	if (MMU_FEATURE(mmu, KGSL_MMU_NEED_GUARD_PAGE))
+		entry->memdesc.priv |= KGSL_MEMDESC_GUARD_PAGE;
+
+	if (param->flags & KGSL_MEMFLAGS_SECURE)
+		entry->memdesc.priv |= KGSL_MEMDESC_SECURE;
+
+	switch (memtype) {
+	case KGSL_MEM_ENTRY_USER:
+		result = _map_usermem_addr(dev_priv->device, private->pagetable,
+			entry, param->hostptr, param->offset, param->len);
+		break;
+	case KGSL_MEM_ENTRY_ION:
+		if (param->offset != 0)
+			result = -EINVAL;
+		else
+			result = _map_usermem_dma_buf(dev_priv->device,
+				private->pagetable, entry, param->fd);
+		break;
+	default:
+		result = -EOPNOTSUPP;
+		break;
+	}
+
+	if (result)
+		goto error;
+
+	if ((param->flags & KGSL_MEMFLAGS_SECURE) &&
+		(entry->memdesc.size & mmu->secure_align_mask)) {
+		result = -EINVAL;
+		goto error_attach;
+	}
+
+	if (entry->memdesc.size >= SZ_2M)
+		kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_2M));
+	else if (entry->memdesc.size >= SZ_1M)
+		kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_1M));
+	else if (entry->memdesc.size >= SZ_64K)
+		kgsl_memdesc_set_align(&entry->memdesc, ilog2(SZ_64));
+
+	/* echo back flags */
+	param->flags = (unsigned int) entry->memdesc.flags;
+
+	result = kgsl_mem_entry_attach_process(dev_priv->device, private,
+		entry);
+	if (result)
+		goto error_attach;
+
+	/* Adjust the returned value for a non 4k aligned offset */
+	param->gpuaddr = (unsigned long)
+		entry->memdesc.gpuaddr + (param->offset & PAGE_MASK);
+
+	KGSL_STATS_ADD(param->len, &kgsl_driver.stats.mapped,
+		&kgsl_driver.stats.mapped_max);
+
+	kgsl_process_add_stats(private,
+			kgsl_memdesc_usermem_type(&entry->memdesc), param->len);
+
+	trace_kgsl_mem_map(entry, param->fd);
+
+	kgsl_mem_entry_commit_process(entry);
+	return result;
+
+error_attach:
+	switch (memtype) {
+	case KGSL_MEM_ENTRY_ION:
+		kgsl_destroy_ion(entry->priv_data);
+		entry->memdesc.sgt = NULL;
+		break;
+	default:
+		break;
+	}
+	kgsl_sharedmem_free(&entry->memdesc);
+error:
+	/* Clear gpuaddr here so userspace doesn't get any wrong ideas */
+	param->gpuaddr = 0;
+
+	kfree(entry);
+	return result;
+}
+
+static int _kgsl_gpumem_sync_cache(struct kgsl_mem_entry *entry,
+		uint64_t offset, uint64_t length, unsigned int op)
+{
+	int ret = 0;
+	int cacheop;
+	int mode;
+
+	/*
+	 * Flush is defined as (clean | invalidate).  If both bits are set, then
+	 * do a flush, otherwise check for the individual bits and clean or inv
+	 * as requested
+	 */
+
+	if ((op & KGSL_GPUMEM_CACHE_FLUSH) == KGSL_GPUMEM_CACHE_FLUSH)
+		cacheop = KGSL_CACHE_OP_FLUSH;
+	else if (op & KGSL_GPUMEM_CACHE_CLEAN)
+		cacheop = KGSL_CACHE_OP_CLEAN;
+	else if (op & KGSL_GPUMEM_CACHE_INV)
+		cacheop = KGSL_CACHE_OP_INV;
+	else {
+		ret = -EINVAL;
+		goto done;
+	}
+
+	if (!(op & KGSL_GPUMEM_CACHE_RANGE)) {
+		offset = 0;
+		length = entry->memdesc.size;
+	}
+
+	mode = kgsl_memdesc_get_cachemode(&entry->memdesc);
+	if (mode != KGSL_CACHEMODE_UNCACHED
+		&& mode != KGSL_CACHEMODE_WRITECOMBINE) {
+		trace_kgsl_mem_sync_cache(entry, offset, length, op);
+		ret = kgsl_cache_range_op(&entry->memdesc, offset,
+					length, cacheop);
+	}
+
+done:
+	return ret;
+}
+
+/* New cache sync function - supports both directions (clean and invalidate) */
+
+long kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data)
+{
+	struct kgsl_gpumem_sync_cache *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry = NULL;
+	long ret;
+
+	if (param->id != 0)
+		entry = kgsl_sharedmem_find_id(private, param->id);
+	else if (param->gpuaddr != 0)
+		entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr);
+
+	if (entry == NULL)
+		return -EINVAL;
+
+	ret = _kgsl_gpumem_sync_cache(entry, (uint64_t) param->offset,
+					(uint64_t) param->length, param->op);
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
+static int mem_id_cmp(const void *_a, const void *_b)
+{
+	const unsigned int *a = _a, *b = _b;
+
+	if (*a == *b)
+		return 0;
+	return (*a > *b) ? 1 : -1;
+}
+
+#ifdef CONFIG_ARM64
+/* Do not support full flush on ARM64 targets */
+static inline bool check_full_flush(size_t size, int op)
+{
+	return false;
+}
+#else
+/* Support full flush if the size is bigger than the threshold */
+static inline bool check_full_flush(size_t size, int op)
+{
+	/* If we exceed the breakeven point, flush the entire cache */
+	bool ret = (kgsl_driver.full_cache_threshold != 0) &&
+		(size >= kgsl_driver.full_cache_threshold) &&
+		(op == KGSL_GPUMEM_CACHE_FLUSH);
+	if (ret) {
+		trace_kgsl_mem_sync_full_cache(actual_count, op_size);
+		flush_cache_all();
+	}
+	return ret;
+}
+#endif
+
+long kgsl_ioctl_gpumem_sync_cache_bulk(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data)
+{
+	int i;
+	struct kgsl_gpumem_sync_cache_bulk *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	unsigned int id, last_id = 0, *id_list = NULL, actual_count = 0;
+	struct kgsl_mem_entry **entries = NULL;
+	long ret = 0;
+	uint64_t op_size = 0;
+	bool full_flush = false;
+
+	if (param->id_list == NULL || param->count == 0
+			|| param->count > (PAGE_SIZE / sizeof(unsigned int)))
+		return -EINVAL;
+
+	id_list = kcalloc(param->count, sizeof(unsigned int), GFP_KERNEL);
+	if (id_list == NULL)
+		return -ENOMEM;
+
+	entries = kcalloc(param->count, sizeof(*entries), GFP_KERNEL);
+	if (entries == NULL) {
+		ret = -ENOMEM;
+		goto end;
+	}
+
+	if (copy_from_user(id_list, param->id_list,
+				param->count * sizeof(unsigned int))) {
+		ret = -EFAULT;
+		goto end;
+	}
+	/* sort the ids so we can weed out duplicates */
+	sort(id_list, param->count, sizeof(*id_list), mem_id_cmp, NULL);
+
+	for (i = 0; i < param->count; i++) {
+		unsigned int cachemode;
+		struct kgsl_mem_entry *entry = NULL;
+
+		id = id_list[i];
+		/* skip 0 ids or duplicates */
+		if (id == last_id)
+			continue;
+
+		entry = kgsl_sharedmem_find_id(private, id);
+		if (entry == NULL)
+			continue;
+
+		/* skip uncached memory */
+		cachemode = kgsl_memdesc_get_cachemode(&entry->memdesc);
+		if (cachemode != KGSL_CACHEMODE_WRITETHROUGH &&
+		    cachemode != KGSL_CACHEMODE_WRITEBACK) {
+			kgsl_mem_entry_put(entry);
+			continue;
+		}
+
+		op_size += entry->memdesc.size;
+		entries[actual_count++] = entry;
+
+		full_flush  = check_full_flush(op_size, param->op);
+		if (full_flush)
+			break;
+
+		last_id = id;
+	}
+
+	param->op &= ~KGSL_GPUMEM_CACHE_RANGE;
+
+	for (i = 0; i < actual_count; i++) {
+		if (!full_flush)
+			_kgsl_gpumem_sync_cache(entries[i], 0,
+						entries[i]->memdesc.size,
+						param->op);
+		kgsl_mem_entry_put(entries[i]);
+	}
+end:
+	kfree(entries);
+	kfree(id_list);
+	return ret;
+}
+
+/* Legacy cache function, does a flush (clean  + invalidate) */
+
+long kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv,
+				 unsigned int cmd, void *data)
+{
+	struct kgsl_sharedmem_free *param = data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry = NULL;
+	long ret;
+
+	entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr);
+	if (entry == NULL)
+		return -EINVAL;
+
+	ret = _kgsl_gpumem_sync_cache(entry, 0, entry->memdesc.size,
+					KGSL_GPUMEM_CACHE_FLUSH);
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
+long kgsl_ioctl_gpuobj_sync(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_gpuobj_sync *param = data;
+	struct kgsl_gpuobj_sync_obj *objs;
+	struct kgsl_mem_entry **entries;
+	long ret = 0;
+	bool full_flush = false;
+	uint64_t size = 0;
+	int i, count = 0;
+	void __user *ptr;
+
+	if (param->count == 0 || param->count > 128)
+		return -EINVAL;
+
+	objs = kcalloc(param->count, sizeof(*objs), GFP_KERNEL);
+	if (objs == NULL)
+		return -ENOMEM;
+
+	entries = kcalloc(param->count, sizeof(*entries), GFP_KERNEL);
+	if (entries == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ptr = to_user_ptr(param->objs);
+
+	for (i = 0; i < param->count; i++) {
+		ret = _copy_from_user(&objs[i], ptr, sizeof(*objs),
+			param->obj_len);
+		if (ret)
+			goto out;
+
+		entries[i] = kgsl_sharedmem_find_id(private, objs[i].id);
+
+		/* Not finding the ID is not a fatal failure - just skip it */
+		if (entries[i] == NULL)
+			continue;
+
+		count++;
+
+		if (!(objs[i].op & KGSL_GPUMEM_CACHE_RANGE))
+			size += entries[i]->memdesc.size;
+		else if (objs[i].offset < entries[i]->memdesc.size)
+			size += (entries[i]->memdesc.size - objs[i].offset);
+
+		full_flush = check_full_flush(size, objs[i].op);
+		if (full_flush)
+			break;
+
+		ptr += sizeof(*objs);
+	}
+
+	if (!full_flush) {
+		for (i = 0; !ret && i < param->count; i++)
+			if (entries[i])
+				ret = _kgsl_gpumem_sync_cache(entries[i],
+						objs[i].offset, objs[i].length,
+						objs[i].op);
+	}
+
+	for (i = 0; i < param->count; i++)
+		if (entries[i])
+			kgsl_mem_entry_put(entries[i]);
+
+out:
+	kfree(entries);
+	kfree(objs);
+
+	return ret;
+}
+
+#ifdef CONFIG_ARM64
+static uint64_t kgsl_filter_cachemode(uint64_t flags)
+{
+	/*
+	 * WRITETHROUGH is not supported in arm64, so we tell the user that we
+	 * use WRITEBACK which is the default caching policy.
+	 */
+	if ((flags & KGSL_CACHEMODE_MASK) >> KGSL_CACHEMODE_SHIFT ==
+					KGSL_CACHEMODE_WRITETHROUGH) {
+		flags &= ~((uint64_t) KGSL_CACHEMODE_MASK);
+		flags |= (KGSL_CACHEMODE_WRITEBACK << KGSL_CACHEMODE_SHIFT) &
+							KGSL_CACHEMODE_MASK;
+	}
+	return flags;
+}
+#else
+static uint64_t kgsl_filter_cachemode(uint64_t flags)
+{
+	return flags;
+}
+#endif
+
+/* The largest allowable alignment for a GPU object is 32MB */
+#define KGSL_MAX_ALIGN (32 * SZ_1M)
+
+static struct kgsl_mem_entry *gpumem_alloc_entry(
+		struct kgsl_device_private *dev_priv,
+		uint64_t size, uint64_t flags)
+{
+	int ret;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry;
+	unsigned int align;
+
+	flags &= KGSL_MEMFLAGS_GPUREADONLY
+		| KGSL_CACHEMODE_MASK
+		| KGSL_MEMTYPE_MASK
+		| KGSL_MEMALIGN_MASK
+		| KGSL_MEMFLAGS_USE_CPU_MAP
+		| KGSL_MEMFLAGS_SECURE
+		| KGSL_MEMFLAGS_FORCE_32BIT;
+
+	/* Turn off SVM if the system doesn't support it */
+	if (!kgsl_mmu_use_cpu_map(&dev_priv->device->mmu))
+		flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);
+
+	/* Return not supported error if secure memory isn't enabled */
+	if (!kgsl_mmu_is_secured(&dev_priv->device->mmu) &&
+			(flags & KGSL_MEMFLAGS_SECURE)) {
+		dev_WARN_ONCE(dev_priv->device->dev, 1,
+				"Secure memory not supported");
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	/* Secure memory disables advanced addressing modes */
+	if (flags & KGSL_MEMFLAGS_SECURE)
+		flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);
+
+	/* Cap the alignment bits to the highest number we can handle */
+	align = MEMFLAGS(flags, KGSL_MEMALIGN_MASK, KGSL_MEMALIGN_SHIFT);
+	if (align >= ilog2(KGSL_MAX_ALIGN)) {
+		KGSL_CORE_ERR("Alignment too large; restricting to %dK\n",
+			KGSL_MAX_ALIGN >> 10);
+
+		flags &= ~((uint64_t) KGSL_MEMALIGN_MASK);
+		flags |= (ilog2(KGSL_MAX_ALIGN) << KGSL_MEMALIGN_SHIFT) &
+			KGSL_MEMALIGN_MASK;
+	}
+
+	/* For now only allow allocations up to 4G */
+	if (size == 0 || size > UINT_MAX)
+		return ERR_PTR(-EINVAL);
+
+	flags = kgsl_filter_cachemode(flags);
+
+	entry = kgsl_mem_entry_create();
+	if (entry == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	if (MMU_FEATURE(&dev_priv->device->mmu, KGSL_MMU_NEED_GUARD_PAGE))
+		entry->memdesc.priv |= KGSL_MEMDESC_GUARD_PAGE;
+
+	if (flags & KGSL_MEMFLAGS_SECURE)
+		entry->memdesc.priv |= KGSL_MEMDESC_SECURE;
+
+	ret = kgsl_allocate_user(dev_priv->device, &entry->memdesc,
+		size, flags);
+	if (ret != 0)
+		goto err;
+
+	ret = kgsl_mem_entry_attach_process(dev_priv->device, private, entry);
+	if (ret != 0) {
+		kgsl_sharedmem_free(&entry->memdesc);
+		goto err;
+	}
+
+	kgsl_process_add_stats(private,
+			kgsl_memdesc_usermem_type(&entry->memdesc),
+			entry->memdesc.size);
+	trace_kgsl_mem_alloc(entry);
+
+	kgsl_mem_entry_commit_process(entry);
+	return entry;
+err:
+	kfree(entry);
+	return ERR_PTR(ret);
+}
+
+static void copy_metadata(struct kgsl_mem_entry *entry, uint64_t metadata,
+		unsigned int len)
+{
+	unsigned int i, size;
+
+	if (len == 0)
+		return;
+
+	size = min_t(unsigned int, len, sizeof(entry->metadata) - 1);
+
+	if (copy_from_user(entry->metadata, to_user_ptr(metadata), size)) {
+		memset(entry->metadata, 0, sizeof(entry->metadata));
+		return;
+	}
+
+	/* Clean up non printable characters in the string */
+	for (i = 0; i < size && entry->metadata[i] != 0; i++) {
+		if (!isprint(entry->metadata[i]))
+			entry->metadata[i] = '?';
+	}
+}
+
+long kgsl_ioctl_gpuobj_alloc(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_gpuobj_alloc *param = data;
+	struct kgsl_mem_entry *entry;
+
+	entry = gpumem_alloc_entry(dev_priv, param->size, param->flags);
+
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	copy_metadata(entry, param->metadata, param->metadata_len);
+
+	param->size = entry->memdesc.size;
+	param->flags = entry->memdesc.flags;
+	param->mmapsize = kgsl_memdesc_footprint(&entry->memdesc);
+	param->id = entry->id;
+
+	return 0;
+}
+
+long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_gpumem_alloc *param = data;
+	struct kgsl_mem_entry *entry;
+	uint64_t flags = param->flags;
+
+	/* Legacy functions doesn't support these advanced features */
+	flags &= ~((uint64_t) KGSL_MEMFLAGS_USE_CPU_MAP);
+	flags |= KGSL_MEMFLAGS_FORCE_32BIT;
+
+	entry = gpumem_alloc_entry(dev_priv, (uint64_t) param->size, flags);
+
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr;
+	param->size = (size_t) entry->memdesc.size;
+	param->flags = (unsigned int) entry->memdesc.flags;
+
+	return 0;
+}
+
+long kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv,
+			unsigned int cmd, void *data)
+{
+	struct kgsl_gpumem_alloc_id *param = data;
+	struct kgsl_mem_entry *entry;
+	uint64_t flags = param->flags;
+
+	flags |= KGSL_MEMFLAGS_FORCE_32BIT;
+
+	entry = gpumem_alloc_entry(dev_priv, (uint64_t) param->size, flags);
+
+	if (IS_ERR(entry))
+		return PTR_ERR(entry);
+
+	param->id = entry->id;
+	param->flags = (unsigned int) entry->memdesc.flags;
+	param->size = (size_t) entry->memdesc.size;
+	param->mmapsize = (size_t) kgsl_memdesc_footprint(&entry->memdesc);
+	param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr;
+
+	return 0;
+}
+
+long kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv,
+			unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_gpumem_get_info *param = data;
+	struct kgsl_mem_entry *entry = NULL;
+	int result = 0;
+
+	if (param->id != 0)
+		entry = kgsl_sharedmem_find_id(private, param->id);
+	else if (param->gpuaddr != 0)
+		entry = kgsl_sharedmem_find(private, (uint64_t) param->gpuaddr);
+
+	if (entry == NULL)
+		return -EINVAL;
+
+	/*
+	 * If any of the 64 bit address / sizes would end up being
+	 * truncated, return -ERANGE.  That will signal the user that they
+	 * should use a more modern API
+	 */
+	if (entry->memdesc.gpuaddr > ULONG_MAX)
+		result = -ERANGE;
+
+	param->gpuaddr = (unsigned long) entry->memdesc.gpuaddr;
+	param->id = entry->id;
+	param->flags = (unsigned int) entry->memdesc.flags;
+	param->size = (size_t) entry->memdesc.size;
+	param->mmapsize = (size_t) kgsl_memdesc_footprint(&entry->memdesc);
+	param->useraddr = entry->memdesc.useraddr;
+
+	kgsl_mem_entry_put(entry);
+	return result;
+}
+
+static inline int _sparse_alloc_param_sanity_check(uint64_t size,
+		uint64_t pagesize)
+{
+	if (size == 0 || pagesize == 0)
+		return -EINVAL;
+
+	if (pagesize != PAGE_SIZE && pagesize != SZ_64K)
+		return -EINVAL;
+
+	if (pagesize > size || !IS_ALIGNED(size, pagesize))
+		return -EINVAL;
+
+	return 0;
+}
+
+long kgsl_ioctl_sparse_phys_alloc(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *process = dev_priv->process_priv;
+	struct kgsl_sparse_phys_alloc *param = data;
+	struct kgsl_mem_entry *entry;
+	int ret;
+	int id;
+
+	ret = _sparse_alloc_param_sanity_check(param->size, param->pagesize);
+	if (ret)
+		return ret;
+
+	entry = kgsl_mem_entry_create();
+	if (entry == NULL)
+		return -ENOMEM;
+
+	ret = kgsl_process_private_get(process);
+	if (!ret) {
+		ret = -EBADF;
+		goto err_free_entry;
+	}
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&process->mem_lock);
+	/* Allocate the ID but don't attach the pointer just yet */
+	id = idr_alloc(&process->mem_idr, NULL, 1, 0, GFP_NOWAIT);
+	spin_unlock(&process->mem_lock);
+	idr_preload_end();
+
+	if (id < 0) {
+		ret = id;
+		goto err_put_proc_priv;
+	}
+
+	entry->id = id;
+	entry->priv = process;
+
+	entry->memdesc.flags = KGSL_MEMFLAGS_SPARSE_PHYS;
+	kgsl_memdesc_set_align(&entry->memdesc, ilog2(param->pagesize));
+
+	ret = kgsl_allocate_user(dev_priv->device, &entry->memdesc,
+			param->size, entry->memdesc.flags);
+	if (ret)
+		goto err_remove_idr;
+
+	/* Sanity check to verify we got correct pagesize */
+	if (param->pagesize != PAGE_SIZE && entry->memdesc.sgt != NULL) {
+		struct scatterlist *s;
+		int i;
+
+		for_each_sg(entry->memdesc.sgt->sgl, s,
+				entry->memdesc.sgt->nents, i) {
+			if (!IS_ALIGNED(s->length, param->pagesize))
+				goto err_invalid_pages;
+		}
+	}
+
+	param->id = entry->id;
+	param->flags = entry->memdesc.flags;
+
+	trace_sparse_phys_alloc(entry->id, param->size, param->pagesize);
+	kgsl_mem_entry_commit_process(entry);
+
+	return 0;
+
+err_invalid_pages:
+	kgsl_sharedmem_free(&entry->memdesc);
+err_remove_idr:
+	spin_lock(&process->mem_lock);
+	idr_remove(&process->mem_idr, entry->id);
+	spin_unlock(&process->mem_lock);
+err_put_proc_priv:
+	kgsl_process_private_put(process);
+err_free_entry:
+	kfree(entry);
+
+	return ret;
+}
+
+long kgsl_ioctl_sparse_phys_free(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *process = dev_priv->process_priv;
+	struct kgsl_sparse_phys_free *param = data;
+	struct kgsl_mem_entry *entry;
+
+	entry = kgsl_sharedmem_find_id_flags(process, param->id,
+			KGSL_MEMFLAGS_SPARSE_PHYS);
+	if (entry == NULL)
+		return -EINVAL;
+
+	if (entry->memdesc.cur_bindings != 0) {
+		kgsl_mem_entry_put(entry);
+		return -EINVAL;
+	}
+
+	trace_sparse_phys_free(entry->id);
+
+	/* One put for find_id(), one put for the kgsl_mem_entry_create() */
+	kgsl_mem_entry_put(entry);
+	kgsl_mem_entry_put(entry);
+
+	return 0;
+}
+
+long kgsl_ioctl_sparse_virt_alloc(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_sparse_virt_alloc *param = data;
+	struct kgsl_mem_entry *entry;
+	int ret;
+
+	ret = _sparse_alloc_param_sanity_check(param->size, param->pagesize);
+	if (ret)
+		return ret;
+
+	entry = kgsl_mem_entry_create();
+	if (entry == NULL)
+		return -ENOMEM;
+
+	entry->memdesc.flags = KGSL_MEMFLAGS_SPARSE_VIRT;
+	entry->memdesc.size = param->size;
+	entry->memdesc.cur_bindings = 0;
+	kgsl_memdesc_set_align(&entry->memdesc, ilog2(param->pagesize));
+
+	spin_lock_init(&entry->bind_lock);
+	entry->bind_tree = RB_ROOT;
+
+	ret = kgsl_mem_entry_attach_process(dev_priv->device, private, entry);
+	if (ret) {
+		kfree(entry);
+		return ret;
+	}
+
+	param->id = entry->id;
+	param->gpuaddr = entry->memdesc.gpuaddr;
+	param->flags = entry->memdesc.flags;
+
+	trace_sparse_virt_alloc(entry->id, param->size, param->pagesize);
+	kgsl_mem_entry_commit_process(entry);
+
+	return 0;
+}
+
+long kgsl_ioctl_sparse_virt_free(struct kgsl_device_private *dev_priv,
+	unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *process = dev_priv->process_priv;
+	struct kgsl_sparse_virt_free *param = data;
+	struct kgsl_mem_entry *entry = NULL;
+
+	entry = kgsl_sharedmem_find_id_flags(process, param->id,
+			KGSL_MEMFLAGS_SPARSE_VIRT);
+	if (entry == NULL)
+		return -EINVAL;
+
+	if (entry->bind_tree.rb_node != NULL) {
+		kgsl_mem_entry_put(entry);
+		return -EINVAL;
+	}
+
+	trace_sparse_virt_free(entry->id);
+
+	/* One put for find_id(), one put for the kgsl_mem_entry_create() */
+	kgsl_mem_entry_put(entry);
+	kgsl_mem_entry_put(entry);
+
+	return 0;
+}
+
+static int _sparse_add_to_bind_tree(struct kgsl_mem_entry *entry,
+		uint64_t v_offset,
+		struct kgsl_memdesc *memdesc,
+		uint64_t p_offset,
+		uint64_t size,
+		uint64_t flags)
+{
+	struct sparse_bind_object *new;
+	struct rb_node **node, *parent = NULL;
+
+	new = kzalloc(sizeof(*new), GFP_KERNEL);
+	if (new == NULL)
+		return -ENOMEM;
+
+	new->v_off = v_offset;
+	new->p_off = p_offset;
+	new->p_memdesc = memdesc;
+	new->size = size;
+	new->flags = flags;
+
+	node = &entry->bind_tree.rb_node;
+
+	while (*node != NULL) {
+		struct sparse_bind_object *this;
+
+		parent = *node;
+		this = rb_entry(parent, struct sparse_bind_object, node);
+
+		if (new->v_off < this->v_off)
+			node = &parent->rb_left;
+		else if (new->v_off > this->v_off)
+			node = &parent->rb_right;
+	}
+
+	rb_link_node(&new->node, parent, node);
+	rb_insert_color(&new->node, &entry->bind_tree);
+
+	return 0;
+}
+
+static int _sparse_rm_from_bind_tree(struct kgsl_mem_entry *entry,
+		struct sparse_bind_object *obj,
+		uint64_t v_offset, uint64_t size)
+{
+	spin_lock(&entry->bind_lock);
+	if (v_offset == obj->v_off && size >= obj->size) {
+		/*
+		 * We are all encompassing, remove the entry and free
+		 * things up
+		 */
+		rb_erase(&obj->node, &entry->bind_tree);
+		kfree(obj);
+	} else if (v_offset == obj->v_off) {
+		/*
+		 * We are the front of the node, adjust the front of
+		 * the node
+		 */
+		obj->v_off += size;
+		obj->p_off += size;
+		obj->size -= size;
+	} else if ((v_offset + size) == (obj->v_off + obj->size)) {
+		/*
+		 * We are at the end of the obj, adjust the beginning
+		 * points
+		 */
+		obj->size -= size;
+	} else {
+		/*
+		 * We are in the middle of a node, split it up and
+		 * create a new mini node. Adjust this node's bounds
+		 * and add the new node to the list.
+		 */
+		uint64_t tmp_size = obj->size;
+		int ret;
+
+		obj->size = v_offset - obj->v_off;
+
+		spin_unlock(&entry->bind_lock);
+		ret = _sparse_add_to_bind_tree(entry, v_offset + size,
+				obj->p_memdesc,
+				obj->p_off + (v_offset - obj->v_off) + size,
+				tmp_size - (v_offset - obj->v_off) - size,
+				obj->flags);
+
+		return ret;
+	}
+
+	spin_unlock(&entry->bind_lock);
+
+	return 0;
+}
+
+static struct sparse_bind_object *_find_containing_bind_obj(
+		struct kgsl_mem_entry *entry,
+		uint64_t offset, uint64_t size)
+{
+	struct sparse_bind_object *obj = NULL;
+	struct rb_node *node = entry->bind_tree.rb_node;
+
+	spin_lock(&entry->bind_lock);
+
+	while (node != NULL) {
+		obj = rb_entry(node, struct sparse_bind_object, node);
+
+		if (offset == obj->v_off) {
+			break;
+		} else if (offset < obj->v_off) {
+			if (offset + size > obj->v_off)
+				break;
+			node = node->rb_left;
+			obj = NULL;
+		} else if (offset > obj->v_off) {
+			if (offset < obj->v_off + obj->size)
+				break;
+			node = node->rb_right;
+			obj = NULL;
+		}
+	}
+
+	spin_unlock(&entry->bind_lock);
+
+	return obj;
+}
+
+static int _sparse_unbind(struct kgsl_mem_entry *entry,
+		struct sparse_bind_object *bind_obj,
+		uint64_t offset, uint64_t size)
+{
+	struct kgsl_memdesc *memdesc = bind_obj->p_memdesc;
+	struct kgsl_pagetable *pt = memdesc->pagetable;
+	int ret;
+
+	if (memdesc->cur_bindings < (size / PAGE_SIZE))
+		return -EINVAL;
+
+	memdesc->cur_bindings -= size / PAGE_SIZE;
+
+	ret = kgsl_mmu_unmap_offset(pt, memdesc,
+			entry->memdesc.gpuaddr, offset, size);
+	if (ret)
+		return ret;
+
+	ret = kgsl_mmu_sparse_dummy_map(pt, &entry->memdesc, offset, size);
+	if (ret)
+		return ret;
+
+	ret = _sparse_rm_from_bind_tree(entry, bind_obj, offset, size);
+	if (ret == 0) {
+		atomic_long_sub(size, &kgsl_driver.stats.mapped);
+		trace_sparse_unbind(entry->id, offset, size);
+	}
+
+	return ret;
+}
+
+static long sparse_unbind_range(struct kgsl_sparse_binding_object *obj,
+	struct kgsl_mem_entry *virt_entry)
+{
+	struct sparse_bind_object *bind_obj;
+	int ret = 0;
+	uint64_t size = obj->size;
+	uint64_t tmp_size = obj->size;
+	uint64_t offset = obj->virtoffset;
+
+	while (size > 0 && ret == 0) {
+		tmp_size = size;
+		bind_obj = _find_containing_bind_obj(virt_entry, offset, size);
+		if (bind_obj == NULL)
+			return 0;
+
+		if (bind_obj->v_off > offset) {
+			tmp_size = size - bind_obj->v_off - offset;
+			if (tmp_size > bind_obj->size)
+				tmp_size = bind_obj->size;
+			offset = bind_obj->v_off;
+		} else if (bind_obj->v_off < offset) {
+			uint64_t diff = offset - bind_obj->v_off;
+
+			if (diff + size > bind_obj->size)
+				tmp_size = bind_obj->size - diff;
+		} else {
+			if (tmp_size > bind_obj->size)
+				tmp_size = bind_obj->size;
+		}
+
+		ret = _sparse_unbind(virt_entry, bind_obj, offset, tmp_size);
+		if (ret == 0) {
+			offset += tmp_size;
+			size -= tmp_size;
+		}
+	}
+
+	return ret;
+}
+
+static inline bool _is_phys_bindable(struct kgsl_mem_entry *phys_entry,
+		uint64_t offset, uint64_t size, uint64_t flags)
+{
+	struct kgsl_memdesc *memdesc = &phys_entry->memdesc;
+
+	if (!IS_ALIGNED(offset | size, kgsl_memdesc_get_pagesize(memdesc)))
+		return false;
+
+	if (!(flags & KGSL_SPARSE_BIND_MULTIPLE_TO_PHYS) &&
+			offset + size > memdesc->size)
+		return false;
+
+	return true;
+}
+
+static int _sparse_bind(struct kgsl_process_private *process,
+		struct kgsl_mem_entry *virt_entry, uint64_t v_offset,
+		struct kgsl_mem_entry *phys_entry, uint64_t p_offset,
+		uint64_t size, uint64_t flags)
+{
+	int ret;
+	struct kgsl_pagetable *pagetable;
+	struct kgsl_memdesc *memdesc = &phys_entry->memdesc;
+
+	/* map the memory after unlocking if gpuaddr has been assigned */
+	if (memdesc->gpuaddr)
+		return -EINVAL;
+
+	if (memdesc->useraddr != 0)
+		return -EINVAL;
+
+	pagetable = memdesc->pagetable;
+
+	/* Clear out any mappings */
+	ret = kgsl_mmu_unmap_offset(pagetable, &virt_entry->memdesc,
+			virt_entry->memdesc.gpuaddr, v_offset, size);
+	if (ret)
+		return ret;
+
+	ret = kgsl_mmu_map_offset(pagetable, virt_entry->memdesc.gpuaddr,
+			v_offset, memdesc, p_offset, size, flags);
+	if (ret) {
+		/* Try to clean up, but not the end of the world */
+		kgsl_mmu_sparse_dummy_map(pagetable, &virt_entry->memdesc,
+				v_offset, size);
+		return ret;
+	}
+
+	ret = _sparse_add_to_bind_tree(virt_entry, v_offset, memdesc,
+			p_offset, size, flags);
+	if (ret == 0)
+		memdesc->cur_bindings += size / PAGE_SIZE;
+
+	return ret;
+}
+
+static long sparse_bind_range(struct kgsl_process_private *private,
+		struct kgsl_sparse_binding_object *obj,
+		struct kgsl_mem_entry *virt_entry)
+{
+	struct kgsl_mem_entry *phys_entry;
+	int ret;
+
+	phys_entry = kgsl_sharedmem_find_id_flags(private, obj->id,
+			KGSL_MEMFLAGS_SPARSE_PHYS);
+	if (phys_entry == NULL)
+		return -EINVAL;
+
+	if (!_is_phys_bindable(phys_entry, obj->physoffset, obj->size,
+				obj->flags)) {
+		kgsl_mem_entry_put(phys_entry);
+		return -EINVAL;
+	}
+
+	if (kgsl_memdesc_get_align(&virt_entry->memdesc) !=
+			kgsl_memdesc_get_align(&phys_entry->memdesc)) {
+		kgsl_mem_entry_put(phys_entry);
+		return -EINVAL;
+	}
+
+	ret = sparse_unbind_range(obj, virt_entry);
+	if (ret) {
+		kgsl_mem_entry_put(phys_entry);
+		return -EINVAL;
+	}
+
+	ret = _sparse_bind(private, virt_entry, obj->virtoffset,
+			phys_entry, obj->physoffset, obj->size,
+			obj->flags & KGSL_SPARSE_BIND_MULTIPLE_TO_PHYS);
+	if (ret == 0) {
+		KGSL_STATS_ADD(obj->size, &kgsl_driver.stats.mapped,
+				&kgsl_driver.stats.mapped_max);
+
+		trace_sparse_bind(virt_entry->id, obj->virtoffset,
+				phys_entry->id, obj->physoffset,
+				obj->size, obj->flags);
+	}
+
+	kgsl_mem_entry_put(phys_entry);
+
+	return ret;
+}
+
+long kgsl_ioctl_sparse_bind(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_sparse_bind *param = data;
+	struct kgsl_sparse_binding_object obj;
+	struct kgsl_mem_entry *virt_entry;
+	int pg_sz;
+	void __user *ptr;
+	int ret = 0;
+	int i = 0;
+
+	ptr = (void __user *) (uintptr_t) param->list;
+
+	if (param->size > sizeof(struct kgsl_sparse_binding_object) ||
+		param->count == 0 || ptr == NULL)
+		return -EINVAL;
+
+	virt_entry = kgsl_sharedmem_find_id_flags(private, param->id,
+			KGSL_MEMFLAGS_SPARSE_VIRT);
+	if (virt_entry == NULL)
+		return -EINVAL;
+
+	pg_sz = kgsl_memdesc_get_pagesize(&virt_entry->memdesc);
+
+	for (i = 0; i < param->count; i++) {
+		memset(&obj, 0, sizeof(obj));
+		ret = _copy_from_user(&obj, ptr, sizeof(obj), param->size);
+		if (ret)
+			break;
+
+		/* Sanity check initial range */
+		if (obj.size == 0 ||
+			obj.virtoffset + obj.size > virt_entry->memdesc.size ||
+			!(IS_ALIGNED(obj.virtoffset | obj.size, pg_sz))) {
+			ret = -EINVAL;
+			break;
+		}
+
+		if (obj.flags & KGSL_SPARSE_BIND)
+			ret = sparse_bind_range(private, &obj, virt_entry);
+		else if (obj.flags & KGSL_SPARSE_UNBIND)
+			ret = sparse_unbind_range(&obj, virt_entry);
+		else
+			ret = -EINVAL;
+		if (ret)
+			break;
+
+		ptr += sizeof(obj);
+	}
+
+	kgsl_mem_entry_put(virt_entry);
+
+	return ret;
+}
+
+long kgsl_ioctl_gpuobj_info(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_gpuobj_info *param = data;
+	struct kgsl_mem_entry *entry;
+
+	if (param->id == 0)
+		return -EINVAL;
+
+	entry = kgsl_sharedmem_find_id(private, param->id);
+	if (entry == NULL)
+		return -EINVAL;
+
+	param->id = entry->id;
+	param->gpuaddr = entry->memdesc.gpuaddr;
+	param->flags = entry->memdesc.flags;
+	param->size = entry->memdesc.size;
+	param->va_len = kgsl_memdesc_footprint(&entry->memdesc);
+	param->va_addr = (uint64_t) entry->memdesc.useraddr;
+
+	kgsl_mem_entry_put(entry);
+	return 0;
+}
+
+long kgsl_ioctl_gpuobj_set_info(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_gpuobj_set_info *param = data;
+	struct kgsl_mem_entry *entry;
+
+	if (param->id == 0)
+		return -EINVAL;
+
+	entry = kgsl_sharedmem_find_id(private, param->id);
+	if (entry == NULL)
+		return -EINVAL;
+
+	if (param->flags & KGSL_GPUOBJ_SET_INFO_METADATA)
+		copy_metadata(entry, param->metadata, param->metadata_len);
+
+	if (param->flags & KGSL_GPUOBJ_SET_INFO_TYPE) {
+		entry->memdesc.flags &= ~((uint64_t) KGSL_MEMTYPE_MASK);
+		entry->memdesc.flags |= param->type << KGSL_MEMTYPE_SHIFT;
+	}
+
+	kgsl_mem_entry_put(entry);
+	return 0;
+}
+
+/**
+ * kgsl_ioctl_timestamp_event - Register a new timestamp event from userspace
+ * @dev_priv - pointer to the private device structure
+ * @cmd - the ioctl cmd passed from kgsl_ioctl
+ * @data - the user data buffer from kgsl_ioctl
+ * @returns 0 on success or error code on failure
+ */
+
+long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, void *data)
+{
+	struct kgsl_timestamp_event *param = data;
+	int ret;
+
+	switch (param->type) {
+	case KGSL_TIMESTAMP_EVENT_FENCE:
+		ret = kgsl_add_fence_event(dev_priv->device,
+			param->context_id, param->timestamp, param->priv,
+			param->len, dev_priv);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+
+	return ret;
+}
+
+static int
+kgsl_mmap_memstore(struct kgsl_device *device, struct vm_area_struct *vma)
+{
+	struct kgsl_memdesc *memdesc = &device->memstore;
+	int result;
+	unsigned int vma_size = vma->vm_end - vma->vm_start;
+
+	/* The memstore can only be mapped as read only */
+
+	if (vma->vm_flags & VM_WRITE)
+		return -EPERM;
+
+	if (memdesc->size  !=  vma_size) {
+		KGSL_MEM_ERR(device, "memstore bad size: %d should be %llu\n",
+			     vma_size, memdesc->size);
+		return -EINVAL;
+	}
+
+	vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+
+	result = remap_pfn_range(vma, vma->vm_start,
+				device->memstore.physaddr >> PAGE_SHIFT,
+				 vma_size, vma->vm_page_prot);
+	if (result != 0)
+		KGSL_MEM_ERR(device, "remap_pfn_range failed: %d\n",
+			     result);
+
+	return result;
+}
+
+/*
+ * kgsl_gpumem_vm_open is called whenever a vma region is copied or split.
+ * Increase the refcount to make sure that the accounting stays correct
+ */
+
+static void kgsl_gpumem_vm_open(struct vm_area_struct *vma)
+{
+	struct kgsl_mem_entry *entry = vma->vm_private_data;
+
+	if (kgsl_mem_entry_get(entry) == 0)
+		vma->vm_private_data = NULL;
+}
+
+static int
+kgsl_gpumem_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+{
+	struct kgsl_mem_entry *entry = vma->vm_private_data;
+
+	if (!entry)
+		return VM_FAULT_SIGBUS;
+	if (!entry->memdesc.ops || !entry->memdesc.ops->vmfault)
+		return VM_FAULT_SIGBUS;
+
+	return entry->memdesc.ops->vmfault(&entry->memdesc, vma, vmf);
+}
+
+static void
+kgsl_gpumem_vm_close(struct vm_area_struct *vma)
+{
+	struct kgsl_mem_entry *entry  = vma->vm_private_data;
+
+	if (!entry)
+		return;
+
+	entry->memdesc.useraddr = 0;
+	kgsl_mem_entry_put(entry);
+}
+
+static const struct vm_operations_struct kgsl_gpumem_vm_ops = {
+	.open  = kgsl_gpumem_vm_open,
+	.fault = kgsl_gpumem_vm_fault,
+	.close = kgsl_gpumem_vm_close,
+};
+
+static int
+get_mmap_entry(struct kgsl_process_private *private,
+		struct kgsl_mem_entry **out_entry, unsigned long pgoff,
+		unsigned long len)
+{
+	int ret = 0;
+	struct kgsl_mem_entry *entry;
+
+	entry = kgsl_sharedmem_find_id(private, pgoff);
+	if (entry == NULL)
+		entry = kgsl_sharedmem_find(private, pgoff << PAGE_SHIFT);
+
+	if (!entry)
+		return -EINVAL;
+
+	if (!entry->memdesc.ops ||
+		!entry->memdesc.ops->vmflags ||
+		!entry->memdesc.ops->vmfault) {
+		ret = -EINVAL;
+		goto err_put;
+	}
+
+	if (entry->memdesc.flags & KGSL_MEMFLAGS_SPARSE_PHYS) {
+		if (len != entry->memdesc.size) {
+			ret = -EINVAL;
+			goto err_put;
+		}
+	}
+
+	if (entry->memdesc.useraddr != 0) {
+		ret = -EBUSY;
+		goto err_put;
+	}
+
+	if (kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
+		if (len != kgsl_memdesc_footprint(&entry->memdesc)) {
+			ret = -ERANGE;
+			goto err_put;
+		}
+	} else if (len != kgsl_memdesc_footprint(&entry->memdesc) &&
+		len != entry->memdesc.size) {
+		/*
+		 * If cpu_map != gpumap then user can map either the
+		 * footprint or the entry size
+		 */
+		ret = -ERANGE;
+		goto err_put;
+	}
+
+	*out_entry = entry;
+	return 0;
+err_put:
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+
+static unsigned long _gpu_set_svm_region(struct kgsl_process_private *private,
+		struct kgsl_mem_entry *entry, unsigned long addr,
+		unsigned long size)
+{
+	int ret;
+
+	ret = kgsl_mmu_set_svm_region(private->pagetable, (uint64_t) addr,
+		(uint64_t) size);
+
+	if (ret != 0)
+		return ret;
+
+	entry->memdesc.gpuaddr = (uint64_t) addr;
+	entry->memdesc.pagetable = private->pagetable;
+
+	ret = kgsl_mmu_map(private->pagetable, &entry->memdesc);
+	if (ret) {
+		kgsl_mmu_put_gpuaddr(&entry->memdesc);
+		return ret;
+	}
+
+	kgsl_memfree_purge(private->pagetable, entry->memdesc.gpuaddr,
+		entry->memdesc.size);
+
+	return addr;
+}
+
+static unsigned long _gpu_find_svm(struct kgsl_process_private *private,
+		unsigned long start, unsigned long end, unsigned long len,
+		unsigned int align)
+{
+	uint64_t addr = kgsl_mmu_find_svm_region(private->pagetable,
+		(uint64_t) start, (uint64_t)end, (uint64_t) len, align);
+
+	BUG_ON(!IS_ERR_VALUE((unsigned long)addr) && (addr > ULONG_MAX));
+
+	return (unsigned long) addr;
+}
+
+/* Search top down in the CPU VM region for a free address */
+static unsigned long _cpu_get_unmapped_area(unsigned long bottom,
+		unsigned long top, unsigned long len, unsigned long align)
+{
+	struct vm_unmapped_area_info info;
+	unsigned long addr, err;
+
+	info.flags = VM_UNMAPPED_AREA_TOPDOWN;
+	info.low_limit = bottom;
+	info.high_limit = top;
+	info.length = len;
+	info.align_offset = 0;
+	info.align_mask = align - 1;
+
+	addr = vm_unmapped_area(&info);
+
+	if (IS_ERR_VALUE(addr))
+		return addr;
+
+	err = security_mmap_addr(addr);
+	return err ? err : addr;
+}
+
+static unsigned long _search_range(struct kgsl_process_private *private,
+		struct kgsl_mem_entry *entry,
+		unsigned long start, unsigned long end,
+		unsigned long len, uint64_t align)
+{
+	unsigned long cpu, gpu = end, result = -ENOMEM;
+
+	while (gpu > start) {
+		/* find a new empty spot on the CPU below the last one */
+		cpu = _cpu_get_unmapped_area(start, gpu, len,
+			(unsigned long) align);
+		if (IS_ERR_VALUE(cpu)) {
+			result = cpu;
+			break;
+		}
+		/* try to map it on the GPU */
+		result = _gpu_set_svm_region(private, entry, cpu, len);
+		if (!IS_ERR_VALUE(result))
+			break;
+
+		trace_kgsl_mem_unmapped_area_collision(entry, cpu, len);
+
+		if (cpu <= start) {
+			result = -ENOMEM;
+			break;
+		}
+
+		/* move downward to the next empty spot on the GPU */
+		gpu = _gpu_find_svm(private, start, cpu, len, align);
+		if (IS_ERR_VALUE(gpu)) {
+			result = gpu;
+			break;
+		}
+
+		/* Check that_gpu_find_svm doesn't put us in a loop */
+		if (gpu >= cpu) {
+			result = -ENOMEM;
+			break;
+		}
+
+		/* Break if the recommended GPU address is out of range */
+		if (gpu < start) {
+			result = -ENOMEM;
+			break;
+		}
+
+		/*
+		 * Add the length of the chunk to the GPU address to yield the
+		 * upper bound for the CPU search
+		 */
+		gpu += len;
+	}
+	return result;
+}
+
+static unsigned long _get_svm_area(struct kgsl_process_private *private,
+		struct kgsl_mem_entry *entry, unsigned long hint,
+		unsigned long len, unsigned long flags)
+{
+	uint64_t start, end;
+	int align_shift = kgsl_memdesc_get_align(&entry->memdesc);
+	uint64_t align;
+	unsigned long result;
+	unsigned long addr;
+
+	if (align_shift >= ilog2(SZ_2M))
+		align = SZ_2M;
+	else if (align_shift >= ilog2(SZ_1M))
+		align = SZ_1M;
+	else if (align_shift >= ilog2(SZ_64K))
+		align = SZ_64K;
+	else
+		align = SZ_4K;
+
+	/* get the GPU pagetable's SVM range */
+	if (kgsl_mmu_svm_range(private->pagetable, &start, &end,
+				entry->memdesc.flags))
+		return -ERANGE;
+
+	/* now clamp the range based on the CPU's requirements */
+	start = max_t(uint64_t, start, mmap_min_addr);
+	end = min_t(uint64_t, end, current->mm->mmap_base);
+	if (start >= end)
+		return -ERANGE;
+
+	if (flags & MAP_FIXED) {
+		/* we must use addr 'hint' or fail */
+		return _gpu_set_svm_region(private, entry, hint, len);
+	} else if (hint != 0) {
+		struct vm_area_struct *vma;
+
+		/*
+		 * See if the hint is usable, if not we will use
+		 * it as the start point for searching.
+		 */
+		addr = clamp_t(unsigned long, hint & ~(align - 1),
+				start, (end - len) & ~(align - 1));
+
+		vma = find_vma(current->mm, addr);
+
+		if (vma == NULL || ((addr + len) <= vma->vm_start)) {
+			result = _gpu_set_svm_region(private, entry, addr, len);
+
+			/* On failure drop down to keep searching */
+			if (!IS_ERR_VALUE(result))
+				return result;
+		}
+	} else {
+		/* no hint, start search at the top and work down */
+		addr = end & ~(align - 1);
+	}
+
+	/*
+	 * Search downwards from the hint first. If that fails we
+	 * must try to search above it.
+	 */
+	result = _search_range(private, entry, start, addr, len, align);
+	if (IS_ERR_VALUE(result) && hint != 0)
+		result = _search_range(private, entry, addr, end, len, align);
+
+	return result;
+}
+
+static unsigned long
+kgsl_get_unmapped_area(struct file *file, unsigned long addr,
+			unsigned long len, unsigned long pgoff,
+			unsigned long flags)
+{
+	unsigned long val;
+	unsigned long vma_offset = pgoff << PAGE_SHIFT;
+	struct kgsl_device_private *dev_priv = file->private_data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_device *device = dev_priv->device;
+	struct kgsl_mem_entry *entry = NULL;
+
+	if (vma_offset == (unsigned long) device->memstore.gpuaddr)
+		return get_unmapped_area(NULL, addr, len, pgoff, flags);
+
+	val = get_mmap_entry(private, &entry, pgoff, len);
+	if (val)
+		return val;
+
+	/* Do not allow CPU mappings for secure buffers */
+	if (kgsl_memdesc_is_secured(&entry->memdesc)) {
+		val = -EPERM;
+		goto put;
+	}
+
+	if (!kgsl_memdesc_use_cpu_map(&entry->memdesc)) {
+		val = get_unmapped_area(NULL, addr, len, 0, flags);
+		if (IS_ERR_VALUE(val))
+			KGSL_MEM_ERR(device,
+				"get_unmapped_area: pid %d addr %lx pgoff %lx len %ld failed error %d\n",
+				private->pid, addr, pgoff, len, (int) val);
+	} else {
+		val = _get_svm_area(private, entry, addr, len, flags);
+		if (IS_ERR_VALUE(val))
+			KGSL_MEM_ERR(device,
+				"_get_svm_area: pid %d addr %lx pgoff %lx len %ld failed error %d\n",
+				private->pid, addr, pgoff, len, (int) val);
+	}
+
+put:
+	kgsl_mem_entry_put(entry);
+	return val;
+}
+
+static int kgsl_mmap(struct file *file, struct vm_area_struct *vma)
+{
+	unsigned int ret, cache;
+	unsigned long vma_offset = vma->vm_pgoff << PAGE_SHIFT;
+	struct kgsl_device_private *dev_priv = file->private_data;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	struct kgsl_mem_entry *entry = NULL;
+	struct kgsl_device *device = dev_priv->device;
+
+	/* Handle leagacy behavior for memstore */
+
+	if (vma_offset == (unsigned long) device->memstore.gpuaddr)
+		return kgsl_mmap_memstore(device, vma);
+
+	/*
+	 * The reference count on the entry that we get from
+	 * get_mmap_entry() will be held until kgsl_gpumem_vm_close().
+	 */
+	ret = get_mmap_entry(private, &entry, vma->vm_pgoff,
+				vma->vm_end - vma->vm_start);
+	if (ret)
+		return ret;
+
+	vma->vm_flags |= entry->memdesc.ops->vmflags;
+
+	vma->vm_private_data = entry;
+
+	/* Determine user-side caching policy */
+
+	cache = kgsl_memdesc_get_cachemode(&entry->memdesc);
+
+	switch (cache) {
+	case KGSL_CACHEMODE_UNCACHED:
+		vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+		break;
+	case KGSL_CACHEMODE_WRITETHROUGH:
+		vma->vm_page_prot = pgprot_writethroughcache(vma->vm_page_prot);
+		if (pgprot_val(vma->vm_page_prot) ==
+			pgprot_val(pgprot_writebackcache(vma->vm_page_prot)))
+			WARN_ONCE(1, "WRITETHROUGH is deprecated for arm64");
+		break;
+	case KGSL_CACHEMODE_WRITEBACK:
+		vma->vm_page_prot = pgprot_writebackcache(vma->vm_page_prot);
+		break;
+	case KGSL_CACHEMODE_WRITECOMBINE:
+	default:
+		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
+		break;
+	}
+
+	vma->vm_ops = &kgsl_gpumem_vm_ops;
+
+	if (cache == KGSL_CACHEMODE_WRITEBACK
+		|| cache == KGSL_CACHEMODE_WRITETHROUGH) {
+		int i;
+		unsigned long addr = vma->vm_start;
+		struct kgsl_memdesc *m = &entry->memdesc;
+
+		for (i = 0; i < m->page_count; i++) {
+			struct page *page = m->pages[i];
+
+			vm_insert_page(vma, addr, page);
+			addr += PAGE_SIZE;
+		}
+	}
+
+	vma->vm_file = file;
+
+	entry->memdesc.useraddr = vma->vm_start;
+
+	trace_kgsl_mem_mmap(entry);
+	return 0;
+}
+
+static irqreturn_t kgsl_irq_handler(int irq, void *data)
+{
+	struct kgsl_device *device = data;
+
+	return device->ftbl->irq_handler(device);
+
+}
+
+#define KGSL_READ_MESSAGE "OH HAI GPU\n"
+
+static ssize_t kgsl_read(struct file *filep, char __user *buf, size_t count,
+		loff_t *pos)
+{
+	return simple_read_from_buffer(buf, count, pos,
+			KGSL_READ_MESSAGE, strlen(KGSL_READ_MESSAGE) + 1);
+}
+
+static const struct file_operations kgsl_fops = {
+	.owner = THIS_MODULE,
+	.release = kgsl_release,
+	.open = kgsl_open,
+	.mmap = kgsl_mmap,
+	.read = kgsl_read,
+	.get_unmapped_area = kgsl_get_unmapped_area,
+	.unlocked_ioctl = kgsl_ioctl,
+	.compat_ioctl = kgsl_compat_ioctl,
+};
+
+struct kgsl_driver kgsl_driver  = {
+	.process_mutex = __MUTEX_INITIALIZER(kgsl_driver.process_mutex),
+	.ptlock = __SPIN_LOCK_UNLOCKED(kgsl_driver.ptlock),
+	.devlock = __MUTEX_INITIALIZER(kgsl_driver.devlock),
+	/*
+	 * Full cache flushes are faster than line by line on at least
+	 * 8064 and 8974 once the region to be flushed is > 16mb.
+	 */
+	.full_cache_threshold = SZ_16M,
+
+	.stats.vmalloc = ATOMIC_LONG_INIT(0),
+	.stats.vmalloc_max = ATOMIC_LONG_INIT(0),
+	.stats.page_alloc = ATOMIC_LONG_INIT(0),
+	.stats.page_alloc_max = ATOMIC_LONG_INIT(0),
+	.stats.coherent = ATOMIC_LONG_INIT(0),
+	.stats.coherent_max = ATOMIC_LONG_INIT(0),
+	.stats.secure = ATOMIC_LONG_INIT(0),
+	.stats.secure_max = ATOMIC_LONG_INIT(0),
+	.stats.mapped = ATOMIC_LONG_INIT(0),
+	.stats.mapped_max = ATOMIC_LONG_INIT(0),
+};
+EXPORT_SYMBOL(kgsl_driver);
+
+static void _unregister_device(struct kgsl_device *device)
+{
+	int minor;
+
+	mutex_lock(&kgsl_driver.devlock);
+	for (minor = 0; minor < KGSL_DEVICE_MAX; minor++) {
+		if (device == kgsl_driver.devp[minor])
+			break;
+	}
+	if (minor != KGSL_DEVICE_MAX) {
+		device_destroy(kgsl_driver.class,
+				MKDEV(MAJOR(kgsl_driver.major), minor));
+		kgsl_driver.devp[minor] = NULL;
+	}
+	mutex_unlock(&kgsl_driver.devlock);
+}
+
+static int _register_device(struct kgsl_device *device)
+{
+	int minor, ret;
+	dev_t dev;
+
+	/* Find a minor for the device */
+
+	mutex_lock(&kgsl_driver.devlock);
+	for (minor = 0; minor < KGSL_DEVICE_MAX; minor++) {
+		if (kgsl_driver.devp[minor] == NULL) {
+			kgsl_driver.devp[minor] = device;
+			break;
+		}
+	}
+	mutex_unlock(&kgsl_driver.devlock);
+
+	if (minor == KGSL_DEVICE_MAX) {
+		KGSL_CORE_ERR("minor devices exhausted\n");
+		return -ENODEV;
+	}
+
+	/* Create the device */
+	dev = MKDEV(MAJOR(kgsl_driver.major), minor);
+	device->dev = device_create(kgsl_driver.class,
+				    &device->pdev->dev,
+				    dev, device,
+				    device->name);
+
+	if (IS_ERR(device->dev)) {
+		mutex_lock(&kgsl_driver.devlock);
+		kgsl_driver.devp[minor] = NULL;
+		mutex_unlock(&kgsl_driver.devlock);
+		ret = PTR_ERR(device->dev);
+		KGSL_CORE_ERR("device_create(%s): %d\n", device->name, ret);
+		return ret;
+	}
+
+	dev_set_drvdata(&device->pdev->dev, device);
+	return 0;
+}
+
+int kgsl_device_platform_probe(struct kgsl_device *device)
+{
+	int status = -EINVAL;
+	struct resource *res;
+	int cpu;
+
+	status = _register_device(device);
+	if (status)
+		return status;
+
+	/* Initialize logging first, so that failures below actually print. */
+	kgsl_device_debugfs_init(device);
+
+	status = kgsl_pwrctrl_init(device);
+	if (status)
+		goto error;
+
+	/* Get starting physical address of device registers */
+	res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+					   device->iomemname);
+	if (res == NULL) {
+		KGSL_DRV_ERR(device, "platform_get_resource_byname failed\n");
+		status = -EINVAL;
+		goto error_pwrctrl_close;
+	}
+	if (res->start == 0 || resource_size(res) == 0) {
+		KGSL_DRV_ERR(device, "dev %d invalid register region\n",
+			device->id);
+		status = -EINVAL;
+		goto error_pwrctrl_close;
+	}
+
+	device->reg_phys = res->start;
+	device->reg_len = resource_size(res);
+
+	/*
+	 * Check if a shadermemname is defined, and then get shader memory
+	 * details including shader memory starting physical address
+	 * and shader memory length
+	 */
+	if (device->shadermemname != NULL) {
+		res = platform_get_resource_byname(device->pdev, IORESOURCE_MEM,
+						device->shadermemname);
+
+		if (res == NULL) {
+			KGSL_DRV_WARN(device,
+			"Shader memory: platform_get_resource_byname failed\n");
+		}
+
+		else {
+			device->shader_mem_phys = res->start;
+			device->shader_mem_len = resource_size(res);
+		}
+
+		if (!devm_request_mem_region(device->dev,
+					device->shader_mem_phys,
+					device->shader_mem_len,
+						device->name)) {
+			KGSL_DRV_WARN(device, "request_mem_region_failed\n");
+		}
+	}
+
+	if (!devm_request_mem_region(device->dev, device->reg_phys,
+				device->reg_len, device->name)) {
+		KGSL_DRV_ERR(device, "request_mem_region failed\n");
+		status = -ENODEV;
+		goto error_pwrctrl_close;
+	}
+
+	device->reg_virt = devm_ioremap(device->dev, device->reg_phys,
+					device->reg_len);
+
+	if (device->reg_virt == NULL) {
+		KGSL_DRV_ERR(device, "ioremap failed\n");
+		status = -ENODEV;
+		goto error_pwrctrl_close;
+	}
+	/*acquire interrupt */
+	device->pwrctrl.interrupt_num =
+		platform_get_irq_byname(device->pdev, device->pwrctrl.irq_name);
+
+	if (device->pwrctrl.interrupt_num <= 0) {
+		KGSL_DRV_ERR(device, "platform_get_irq_byname failed: %d\n",
+					 device->pwrctrl.interrupt_num);
+		status = -EINVAL;
+		goto error_pwrctrl_close;
+	}
+
+	status = devm_request_irq(device->dev, device->pwrctrl.interrupt_num,
+				  kgsl_irq_handler, IRQF_TRIGGER_HIGH,
+				  device->name, device);
+	if (status) {
+		KGSL_DRV_ERR(device, "request_irq(%d) failed: %d\n",
+			      device->pwrctrl.interrupt_num, status);
+		goto error_pwrctrl_close;
+	}
+	disable_irq(device->pwrctrl.interrupt_num);
+
+	KGSL_DRV_INFO(device,
+		"dev_id %d regs phys 0x%08lx size 0x%08x\n",
+		device->id, device->reg_phys, device->reg_len);
+
+	rwlock_init(&device->context_lock);
+
+	setup_timer(&device->idle_timer, kgsl_timer, (unsigned long) device);
+
+	status = kgsl_mmu_probe(device, kgsl_mmu_type);
+	if (status != 0)
+		goto error_pwrctrl_close;
+
+	/* Check to see if our device can perform DMA correctly */
+	status = dma_set_coherent_mask(&device->pdev->dev, KGSL_DMA_BIT_MASK);
+	if (status)
+		goto error_close_mmu;
+
+	/* Initialize the memory pools */
+	kgsl_init_page_pools(device->pdev);
+
+	status = kgsl_allocate_global(device, &device->memstore,
+		KGSL_MEMSTORE_SIZE, 0, KGSL_MEMDESC_CONTIG, "memstore");
+
+	if (status != 0)
+		goto error_close_mmu;
+
+	status = kgsl_allocate_global(device, &device->scratch,
+		PAGE_SIZE, 0, 0, "scratch");
+	if (status != 0)
+		goto error_free_memstore;
+
+	/*
+	 * The default request type PM_QOS_REQ_ALL_CORES is
+	 * applicable to all CPU cores that are online and
+	 * would have a power impact when there are more
+	 * number of CPUs. PM_QOS_REQ_AFFINE_IRQ request
+	 * type shall update/apply the vote only to that CPU to
+	 * which IRQ's affinity is set to.
+	 */
+#ifdef CONFIG_SMP
+
+	device->pwrctrl.pm_qos_req_dma.type = PM_QOS_REQ_AFFINE_IRQ;
+	device->pwrctrl.pm_qos_req_dma.irq = device->pwrctrl.interrupt_num;
+
+#endif
+	pm_qos_add_request(&device->pwrctrl.pm_qos_req_dma,
+				PM_QOS_CPU_DMA_LATENCY,
+				PM_QOS_DEFAULT_VALUE);
+
+	if (device->pwrctrl.l2pc_cpus_mask) {
+
+		device->pwrctrl.l2pc_cpus_qos.type =
+				PM_QOS_REQ_AFFINE_CORES;
+		cpumask_empty(&device->pwrctrl.l2pc_cpus_qos.cpus_affine);
+		for_each_possible_cpu(cpu) {
+			if ((1 << cpu) & device->pwrctrl.l2pc_cpus_mask)
+				cpumask_set_cpu(cpu, &device->pwrctrl.
+						l2pc_cpus_qos.cpus_affine);
+		}
+
+		pm_qos_add_request(&device->pwrctrl.l2pc_cpus_qos,
+				PM_QOS_CPU_DMA_LATENCY,
+				PM_QOS_DEFAULT_VALUE);
+	}
+
+	device->events_wq = alloc_workqueue("kgsl-events",
+		WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+
+	/* Initialize the snapshot engine */
+	kgsl_device_snapshot_init(device);
+
+	/* Initialize common sysfs entries */
+	kgsl_pwrctrl_init_sysfs(device);
+
+	return 0;
+
+error_free_memstore:
+	kgsl_free_global(device, &device->memstore);
+error_close_mmu:
+	kgsl_mmu_close(device);
+error_pwrctrl_close:
+	kgsl_pwrctrl_close(device);
+error:
+	_unregister_device(device);
+	return status;
+}
+EXPORT_SYMBOL(kgsl_device_platform_probe);
+
+void kgsl_device_platform_remove(struct kgsl_device *device)
+{
+	destroy_workqueue(device->events_wq);
+
+	kgsl_device_snapshot_close(device);
+
+	kgsl_exit_page_pools();
+
+	kgsl_pwrctrl_uninit_sysfs(device);
+
+	pm_qos_remove_request(&device->pwrctrl.pm_qos_req_dma);
+	if (device->pwrctrl.l2pc_cpus_mask)
+		pm_qos_remove_request(&device->pwrctrl.l2pc_cpus_qos);
+
+	idr_destroy(&device->context_idr);
+
+	kgsl_free_global(device, &device->scratch);
+
+	kgsl_free_global(device, &device->memstore);
+
+	kgsl_mmu_close(device);
+
+	kgsl_pwrctrl_close(device);
+
+	_unregister_device(device);
+}
+EXPORT_SYMBOL(kgsl_device_platform_remove);
+
+static void kgsl_core_exit(void)
+{
+	kgsl_events_exit();
+	kgsl_core_debugfs_close();
+
+	/*
+	 * We call kgsl_sharedmem_uninit_sysfs() and device_unregister()
+	 * only if kgsl_driver.virtdev has been populated.
+	 * We check at least one member of kgsl_driver.virtdev to
+	 * see if it is not NULL (and thus, has been populated).
+	 */
+	if (kgsl_driver.virtdev.class) {
+		kgsl_sharedmem_uninit_sysfs();
+		device_unregister(&kgsl_driver.virtdev);
+	}
+
+	if (kgsl_driver.class) {
+		class_destroy(kgsl_driver.class);
+		kgsl_driver.class = NULL;
+	}
+
+	kgsl_drawobj_exit();
+
+	kgsl_memfree_exit();
+	unregister_chrdev_region(kgsl_driver.major, KGSL_DEVICE_MAX);
+}
+
+static int __init kgsl_core_init(void)
+{
+	int result = 0;
+	/* alloc major and minor device numbers */
+	result = alloc_chrdev_region(&kgsl_driver.major, 0, KGSL_DEVICE_MAX,
+		"kgsl");
+
+	if (result < 0) {
+
+		KGSL_CORE_ERR("alloc_chrdev_region failed err = %d\n", result);
+		goto err;
+	}
+
+	cdev_init(&kgsl_driver.cdev, &kgsl_fops);
+	kgsl_driver.cdev.owner = THIS_MODULE;
+	kgsl_driver.cdev.ops = &kgsl_fops;
+	result = cdev_add(&kgsl_driver.cdev, MKDEV(MAJOR(kgsl_driver.major), 0),
+		       KGSL_DEVICE_MAX);
+
+	if (result) {
+		KGSL_CORE_ERR("kgsl: cdev_add() failed, dev_num= %d, result= %d\n",
+			kgsl_driver.major, result);
+		goto err;
+	}
+
+	kgsl_driver.class = class_create(THIS_MODULE, "kgsl");
+
+	if (IS_ERR(kgsl_driver.class)) {
+		result = PTR_ERR(kgsl_driver.class);
+		KGSL_CORE_ERR("failed to create class for kgsl");
+		goto err;
+	}
+
+	/*
+	 * Make a virtual device for managing core related things
+	 * in sysfs
+	 */
+	kgsl_driver.virtdev.class = kgsl_driver.class;
+	dev_set_name(&kgsl_driver.virtdev, "kgsl");
+	result = device_register(&kgsl_driver.virtdev);
+	if (result) {
+		KGSL_CORE_ERR("driver_register failed\n");
+		goto err;
+	}
+
+	/* Make kobjects in the virtual device for storing statistics */
+
+	kgsl_driver.ptkobj =
+	  kobject_create_and_add("pagetables",
+				 &kgsl_driver.virtdev.kobj);
+
+	kgsl_driver.prockobj =
+		kobject_create_and_add("proc",
+				       &kgsl_driver.virtdev.kobj);
+
+	kgsl_core_debugfs_init();
+
+	kgsl_sharedmem_init_sysfs();
+
+	INIT_LIST_HEAD(&kgsl_driver.process_list);
+
+	INIT_LIST_HEAD(&kgsl_driver.pagetable_list);
+
+	kgsl_driver.workqueue = alloc_workqueue("kgsl-workqueue",
+		WQ_UNBOUND | WQ_MEM_RECLAIM | WQ_SYSFS, 0);
+
+	kgsl_driver.mem_workqueue = alloc_workqueue("kgsl-mementry",
+		WQ_UNBOUND | WQ_MEM_RECLAIM, 0);
+
+	kgsl_events_init();
+
+	result = kgsl_drawobj_init();
+	if (result)
+		goto err;
+
+	kgsl_memfree_init();
+
+	return 0;
+
+err:
+	kgsl_core_exit();
+	return result;
+}
+
+module_init(kgsl_core_init);
+module_exit(kgsl_core_exit);
+
+MODULE_DESCRIPTION("MSM GPU driver");
+MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/msm/kgsl.h b/drivers/gpu/msm/kgsl.h
new file mode 100644
index 0000000..c60a071
--- /dev/null
+++ b/drivers/gpu/msm/kgsl.h
@@ -0,0 +1,612 @@
+/* Copyright (c) 2008-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_H
+#define __KGSL_H
+
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/vmalloc.h>
+#include <linux/msm_kgsl.h>
+#include <linux/platform_device.h>
+#include <linux/clk.h>
+#include <linux/interrupt.h>
+#include <linux/mutex.h>
+#include <linux/cdev.h>
+#include <linux/regulator/consumer.h>
+#include <linux/mm.h>
+#include <linux/uaccess.h>
+#include <asm/cacheflush.h>
+
+/*
+ * --- kgsl drawobj flags ---
+ * These flags are same as --- drawobj flags ---
+ * but renamed to reflect that cmdbatch is renamed to drawobj.
+ */
+#define KGSL_DRAWOBJ_MEMLIST           KGSL_CMDBATCH_MEMLIST
+#define KGSL_DRAWOBJ_MARKER            KGSL_CMDBATCH_MARKER
+#define KGSL_DRAWOBJ_SUBMIT_IB_LIST    KGSL_CMDBATCH_SUBMIT_IB_LIST
+#define KGSL_DRAWOBJ_CTX_SWITCH        KGSL_CMDBATCH_CTX_SWITCH
+#define KGSL_DRAWOBJ_PROFILING         KGSL_CMDBATCH_PROFILING
+#define KGSL_DRAWOBJ_PROFILING_KTIME   KGSL_CMDBATCH_PROFILING_KTIME
+#define KGSL_DRAWOBJ_END_OF_FRAME      KGSL_CMDBATCH_END_OF_FRAME
+#define KGSL_DRAWOBJ_SYNC              KGSL_CMDBATCH_SYNC
+#define KGSL_DRAWOBJ_PWR_CONSTRAINT    KGSL_CMDBATCH_PWR_CONSTRAINT
+#define KGSL_DRAWOBJ_SPARSE            KGSL_CMDBATCH_SPARSE
+
+#define kgsl_drawobj_profiling_buffer kgsl_cmdbatch_profiling_buffer
+
+
+/* The number of memstore arrays limits the number of contexts allowed.
+ * If more contexts are needed, update multiple for MEMSTORE_SIZE
+ */
+#define KGSL_MEMSTORE_SIZE	((int)(PAGE_SIZE * 2))
+#define KGSL_MEMSTORE_GLOBAL	(0)
+#define KGSL_PRIORITY_MAX_RB_LEVELS 4
+#define KGSL_MEMSTORE_MAX	(KGSL_MEMSTORE_SIZE / \
+	sizeof(struct kgsl_devmemstore) - 1 - KGSL_PRIORITY_MAX_RB_LEVELS)
+
+#define MEMSTORE_RB_OFFSET(rb, field)	\
+	KGSL_MEMSTORE_OFFSET(((rb)->id + KGSL_MEMSTORE_MAX), field)
+
+#define MEMSTORE_ID_GPU_ADDR(dev, iter, field) \
+	((dev)->memstore.gpuaddr + KGSL_MEMSTORE_OFFSET(iter, field))
+
+#define MEMSTORE_RB_GPU_ADDR(dev, rb, field)	\
+	((dev)->memstore.gpuaddr + \
+	 KGSL_MEMSTORE_OFFSET(((rb)->id + KGSL_MEMSTORE_MAX), field))
+
+/*
+ * SCRATCH MEMORY: The scratch memory is one page worth of data that
+ * is mapped into the GPU. This allows for some 'shared' data between
+ * the GPU and CPU. For example, it will be used by the GPU to write
+ * each updated RPTR for each RB.
+ *
+ * Used Data:
+ * Offset: Length(bytes): What
+ * 0x0: 4 * KGSL_PRIORITY_MAX_RB_LEVELS: RB0 RPTR
+ */
+
+/* Shadow global helpers */
+#define SCRATCH_RPTR_OFFSET(id) ((id) * sizeof(unsigned int))
+#define SCRATCH_RPTR_GPU_ADDR(dev, id) \
+	((dev)->scratch.gpuaddr + SCRATCH_RPTR_OFFSET(id))
+
+/* Timestamp window used to detect rollovers (half of integer range) */
+#define KGSL_TIMESTAMP_WINDOW 0x80000000
+
+/*
+ * A macro for memory statistics - add the new size to the stat and if
+ * the statisic is greater then _max, set _max
+ */
+static inline void KGSL_STATS_ADD(uint64_t size, atomic_long_t *stat,
+		atomic_long_t *max)
+{
+	uint64_t ret = atomic_long_add_return(size, stat);
+
+	if (ret > atomic_long_read(max))
+		atomic_long_set(max, ret);
+}
+
+#define KGSL_MAX_NUMIBS 100000
+#define KGSL_MAX_SYNCPOINTS 32
+
+struct kgsl_device;
+struct kgsl_context;
+
+/**
+ * struct kgsl_driver - main container for global KGSL things
+ * @cdev: Character device struct
+ * @major: Major ID for the KGSL device
+ * @class: Pointer to the class struct for the core KGSL sysfs entries
+ * @virtdev: Virtual device for managing the core
+ * @ptkobj: kobject for storing the pagetable statistics
+ * @prockobj: kobject for storing the process statistics
+ * @devp: Array of pointers to the individual KGSL device structs
+ * @process_list: List of open processes
+ * @pagetable_list: LIst of open pagetables
+ * @ptlock: Lock for accessing the pagetable list
+ * @process_mutex: Mutex for accessing the process list
+ * @devlock: Mutex protecting the device list
+ * @stats: Struct containing atomic memory statistics
+ * @full_cache_threshold: the threshold that triggers a full cache flush
+ * @workqueue: Pointer to a single threaded workqueue
+ * @mem_workqueue: Pointer to a workqueue for deferring memory entries
+ */
+struct kgsl_driver {
+	struct cdev cdev;
+	dev_t major;
+	struct class *class;
+	struct device virtdev;
+	struct kobject *ptkobj;
+	struct kobject *prockobj;
+	struct kgsl_device *devp[KGSL_DEVICE_MAX];
+	struct list_head process_list;
+	struct list_head pagetable_list;
+	spinlock_t ptlock;
+	struct mutex process_mutex;
+	struct mutex devlock;
+	struct {
+		atomic_long_t vmalloc;
+		atomic_long_t vmalloc_max;
+		atomic_long_t page_alloc;
+		atomic_long_t page_alloc_max;
+		atomic_long_t coherent;
+		atomic_long_t coherent_max;
+		atomic_long_t secure;
+		atomic_long_t secure_max;
+		atomic_long_t mapped;
+		atomic_long_t mapped_max;
+	} stats;
+	unsigned int full_cache_threshold;
+	struct workqueue_struct *workqueue;
+	struct workqueue_struct *mem_workqueue;
+};
+
+extern struct kgsl_driver kgsl_driver;
+extern struct mutex kgsl_mmu_sync;
+
+struct kgsl_pagetable;
+struct kgsl_memdesc;
+
+struct kgsl_memdesc_ops {
+	unsigned int vmflags;
+	int (*vmfault)(struct kgsl_memdesc *, struct vm_area_struct *,
+		       struct vm_fault *);
+	void (*free)(struct kgsl_memdesc *memdesc);
+	int (*map_kernel)(struct kgsl_memdesc *);
+	void (*unmap_kernel)(struct kgsl_memdesc *);
+};
+
+/* Internal definitions for memdesc->priv */
+#define KGSL_MEMDESC_GUARD_PAGE BIT(0)
+/* Set if the memdesc is mapped into all pagetables */
+#define KGSL_MEMDESC_GLOBAL BIT(1)
+/* The memdesc is frozen during a snapshot */
+#define KGSL_MEMDESC_FROZEN BIT(2)
+/* The memdesc is mapped into a pagetable */
+#define KGSL_MEMDESC_MAPPED BIT(3)
+/* The memdesc is secured for content protection */
+#define KGSL_MEMDESC_SECURE BIT(4)
+/* Memory is accessible in privileged mode */
+#define KGSL_MEMDESC_PRIVILEGED BIT(6)
+/* The memdesc is TZ locked content protection */
+#define KGSL_MEMDESC_TZ_LOCKED BIT(7)
+/* The memdesc is allocated through contiguous memory */
+#define KGSL_MEMDESC_CONTIG BIT(8)
+
+/**
+ * struct kgsl_memdesc - GPU memory object descriptor
+ * @pagetable: Pointer to the pagetable that the object is mapped in
+ * @hostptr: Kernel virtual address
+ * @hostptr_count: Number of threads using hostptr
+ * @useraddr: User virtual address (if applicable)
+ * @gpuaddr: GPU virtual address
+ * @physaddr: Physical address of the memory object
+ * @size: Size of the memory object
+ * @mapsize: Size of memory mapped in userspace
+ * @priv: Internal flags and settings
+ * @sgt: Scatter gather table for allocated pages
+ * @ops: Function hooks for the memdesc memory type
+ * @flags: Flags set from userspace
+ * @dev: Pointer to the struct device that owns this memory
+ * @attrs: dma attributes for this memory
+ * @pages: An array of pointers to allocated pages
+ * @page_count: Total number of pages allocated
+ * @cur_bindings: Number of sparse pages actively bound
+ */
+struct kgsl_memdesc {
+	struct kgsl_pagetable *pagetable;
+	void *hostptr;
+	unsigned int hostptr_count;
+	unsigned long useraddr;
+	uint64_t gpuaddr;
+	phys_addr_t physaddr;
+	uint64_t size;
+	uint64_t mapsize;
+	unsigned int priv;
+	struct sg_table *sgt;
+	struct kgsl_memdesc_ops *ops;
+	uint64_t flags;
+	struct device *dev;
+	unsigned long attrs;
+	struct page **pages;
+	unsigned int page_count;
+	unsigned int cur_bindings;
+};
+
+/*
+ * List of different memory entry types. The usermem enum
+ * starts at 0, which we use for allocated memory, so 1 is
+ * added to the enum values.
+ */
+#define KGSL_MEM_ENTRY_KERNEL 0
+#define KGSL_MEM_ENTRY_USER (KGSL_USER_MEM_TYPE_ADDR + 1)
+#define KGSL_MEM_ENTRY_ION (KGSL_USER_MEM_TYPE_ION + 1)
+#define KGSL_MEM_ENTRY_MAX (KGSL_USER_MEM_TYPE_MAX + 1)
+
+/* symbolic table for trace and debugfs */
+#define KGSL_MEM_TYPES \
+	{ KGSL_MEM_ENTRY_KERNEL, "gpumem" }, \
+	{ KGSL_MEM_ENTRY_USER, "usermem" }, \
+	{ KGSL_MEM_ENTRY_ION, "ion" }
+
+/*
+ * struct kgsl_mem_entry - a userspace memory allocation
+ * @refcount: reference count. Currently userspace can only
+ *  hold a single reference count, but the kernel may hold more.
+ * @memdesc: description of the memory
+ * @priv_data: type-specific data, such as the dma-buf attachment pointer.
+ * @node: rb_node for the gpu address lookup rb tree
+ * @id: idr index for this entry, can be used to find memory that does not have
+ *  a valid GPU address.
+ * @priv: back pointer to the process that owns this memory
+ * @pending_free: if !0, userspace requested that his memory be freed, but there
+ *  are still references to it.
+ * @dev_priv: back pointer to the device file that created this entry.
+ * @metadata: String containing user specified metadata for the entry
+ * @work: Work struct used to schedule a kgsl_mem_entry_put in atomic contexts
+ * @bind_lock: Lock for sparse memory bindings
+ * @bind_tree: RB Tree for sparse memory bindings
+ */
+struct kgsl_mem_entry {
+	struct kref refcount;
+	struct kgsl_memdesc memdesc;
+	void *priv_data;
+	struct rb_node node;
+	unsigned int id;
+	struct kgsl_process_private *priv;
+	int pending_free;
+	char metadata[KGSL_GPUOBJ_ALLOC_METADATA_MAX + 1];
+	struct work_struct work;
+	spinlock_t bind_lock;
+	struct rb_root bind_tree;
+};
+
+struct kgsl_device_private;
+struct kgsl_event_group;
+
+typedef void (*kgsl_event_func)(struct kgsl_device *, struct kgsl_event_group *,
+		void *, int);
+
+/**
+ * struct kgsl_event - KGSL GPU timestamp event
+ * @device: Pointer to the KGSL device that owns the event
+ * @context: Pointer to the context that owns the event
+ * @timestamp: Timestamp for the event to expire
+ * @func: Callback function for for the event when it expires
+ * @priv: Private data passed to the callback function
+ * @node: List node for the kgsl_event_group list
+ * @created: Jiffies when the event was created
+ * @work: Work struct for dispatching the callback
+ * @result: KGSL event result type to pass to the callback
+ * group: The event group this event belongs to
+ */
+struct kgsl_event {
+	struct kgsl_device *device;
+	struct kgsl_context *context;
+	unsigned int timestamp;
+	kgsl_event_func func;
+	void *priv;
+	struct list_head node;
+	unsigned int created;
+	struct work_struct work;
+	int result;
+	struct kgsl_event_group *group;
+};
+
+typedef int (*readtimestamp_func)(struct kgsl_device *, void *,
+	enum kgsl_timestamp_type, unsigned int *);
+
+/**
+ * struct event_group - A list of GPU events
+ * @context: Pointer to the active context for the events
+ * @lock: Spinlock for protecting the list
+ * @events: List of active GPU events
+ * @group: Node for the master group list
+ * @processed: Last processed timestamp
+ * @name: String name for the group (for the debugfs file)
+ * @readtimestamp: Function pointer to read a timestamp
+ * @priv: Priv member to pass to the readtimestamp function
+ */
+struct kgsl_event_group {
+	struct kgsl_context *context;
+	spinlock_t lock;
+	struct list_head events;
+	struct list_head group;
+	unsigned int processed;
+	char name[64];
+	readtimestamp_func readtimestamp;
+	void *priv;
+};
+
+/**
+ * struct kgsl_protected_registers - Protected register range
+ * @base: Offset of the range to be protected
+ * @range: Range (# of registers = 2 ** range)
+ */
+struct kgsl_protected_registers {
+	unsigned int base;
+	int range;
+};
+
+/**
+ * struct sparse_bind_object - Bind metadata
+ * @node: Node for the rb tree
+ * @p_memdesc: Physical memdesc bound to
+ * @v_off: Offset of bind in the virtual entry
+ * @p_off: Offset of bind in the physical memdesc
+ * @size: Size of the bind
+ * @flags: Flags for the bind
+ */
+struct sparse_bind_object {
+	struct rb_node node;
+	struct kgsl_memdesc *p_memdesc;
+	uint64_t v_off;
+	uint64_t p_off;
+	uint64_t size;
+	uint64_t flags;
+};
+
+long kgsl_ioctl_device_getproperty(struct kgsl_device_private *dev_priv,
+					  unsigned int cmd, void *data);
+long kgsl_ioctl_device_setproperty(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_device_waittimestamp_ctxtid(struct kgsl_device_private
+				*dev_priv, unsigned int cmd, void *data);
+long kgsl_ioctl_rb_issueibcmds(struct kgsl_device_private *dev_priv,
+				      unsigned int cmd, void *data);
+long kgsl_ioctl_submit_commands(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data);
+long kgsl_ioctl_cmdstream_readtimestamp_ctxtid(struct kgsl_device_private
+					*dev_priv, unsigned int cmd,
+					void *data);
+long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid(
+						struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data);
+long kgsl_ioctl_drawctxt_create(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_drawctxt_destroy(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_sharedmem_free(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpumem_free_id(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_map_user_mem(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpumem_sync_cache(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpumem_sync_cache_bulk(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_sharedmem_flush_cache(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpumem_alloc(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpumem_alloc_id(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpumem_get_info(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_timestamp_event(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpuobj_alloc(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpuobj_free(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpuobj_info(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpuobj_import(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpuobj_sync(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_gpu_command(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data);
+long kgsl_ioctl_gpuobj_set_info(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data);
+
+long kgsl_ioctl_sparse_phys_alloc(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_sparse_phys_free(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_sparse_virt_alloc(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_sparse_virt_free(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_sparse_bind(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_sparse_unbind(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+
+void kgsl_mem_entry_destroy(struct kref *kref);
+
+void kgsl_get_egl_counts(struct kgsl_mem_entry *entry,
+			int *egl_surface_count, int *egl_image_count);
+
+struct kgsl_mem_entry * __must_check
+kgsl_sharedmem_find(struct kgsl_process_private *private, uint64_t gpuaddr);
+
+struct kgsl_mem_entry * __must_check
+kgsl_sharedmem_find_id(struct kgsl_process_private *process, unsigned int id);
+
+extern const struct dev_pm_ops kgsl_pm_ops;
+
+int kgsl_suspend_driver(struct platform_device *pdev, pm_message_t state);
+int kgsl_resume_driver(struct platform_device *pdev);
+
+static inline int kgsl_gpuaddr_in_memdesc(const struct kgsl_memdesc *memdesc,
+				uint64_t gpuaddr, uint64_t size)
+{
+	/* set a minimum size to search for */
+	if (!size)
+		size = 1;
+
+	/* don't overflow */
+	if (size > U64_MAX - gpuaddr)
+		return 0;
+
+	if (gpuaddr >= memdesc->gpuaddr &&
+	    ((gpuaddr + size) <= (memdesc->gpuaddr + memdesc->size))) {
+		return 1;
+	}
+	return 0;
+}
+
+static inline void *kgsl_memdesc_map(struct kgsl_memdesc *memdesc)
+{
+	if (memdesc->ops && memdesc->ops->map_kernel)
+		memdesc->ops->map_kernel(memdesc);
+
+	return memdesc->hostptr;
+}
+
+static inline void kgsl_memdesc_unmap(struct kgsl_memdesc *memdesc)
+{
+	if (memdesc->ops && memdesc->ops->unmap_kernel)
+		memdesc->ops->unmap_kernel(memdesc);
+}
+
+static inline void *kgsl_gpuaddr_to_vaddr(struct kgsl_memdesc *memdesc,
+					     uint64_t gpuaddr)
+{
+	void *hostptr = NULL;
+
+	if ((gpuaddr >= memdesc->gpuaddr) &&
+		(gpuaddr < (memdesc->gpuaddr + memdesc->size)))
+		hostptr = kgsl_memdesc_map(memdesc);
+
+	return hostptr != NULL ? hostptr + (gpuaddr - memdesc->gpuaddr) : NULL;
+}
+
+static inline int timestamp_cmp(unsigned int a, unsigned int b)
+{
+	/* check for equal */
+	if (a == b)
+		return 0;
+
+	/* check for greater-than for non-rollover case */
+	if ((a > b) && (a - b < KGSL_TIMESTAMP_WINDOW))
+		return 1;
+
+	/* check for greater-than for rollover case
+	 * note that <= is required to ensure that consistent
+	 * results are returned for values whose difference is
+	 * equal to the window size
+	 */
+	a += KGSL_TIMESTAMP_WINDOW;
+	b += KGSL_TIMESTAMP_WINDOW;
+	return ((a > b) && (a - b <= KGSL_TIMESTAMP_WINDOW)) ? 1 : -1;
+}
+
+/**
+ * kgsl_schedule_work() - Schedule a work item on the KGSL workqueue
+ * @work: work item to schedule
+ */
+static inline void kgsl_schedule_work(struct work_struct *work)
+{
+	queue_work(kgsl_driver.workqueue, work);
+}
+
+static inline int
+kgsl_mem_entry_get(struct kgsl_mem_entry *entry)
+{
+	if (entry)
+		return kref_get_unless_zero(&entry->refcount);
+	return 0;
+}
+
+static inline void
+kgsl_mem_entry_put(struct kgsl_mem_entry *entry)
+{
+	if (entry)
+		kref_put(&entry->refcount, kgsl_mem_entry_destroy);
+}
+
+/*
+ * kgsl_addr_range_overlap() - Checks if 2 ranges overlap
+ * @gpuaddr1: Start of first address range
+ * @size1: Size of first address range
+ * @gpuaddr2: Start of second address range
+ * @size2: Size of second address range
+ *
+ * Function returns true if the 2 given address ranges overlap
+ * else false
+ */
+static inline bool kgsl_addr_range_overlap(uint64_t gpuaddr1,
+		uint64_t size1, uint64_t gpuaddr2, uint64_t size2)
+{
+	if ((size1 > (U64_MAX - gpuaddr1)) || (size2 > (U64_MAX - gpuaddr2)))
+		return false;
+	return !(((gpuaddr1 + size1) <= gpuaddr2) ||
+		(gpuaddr1 >= (gpuaddr2 + size2)));
+}
+
+/**
+ * kgsl_malloc() - Use either kzalloc or vmalloc to allocate memory
+ * @size: Size of the desired allocation
+ *
+ * Allocate a block of memory for the driver - if it is small try to allocate it
+ * from kmalloc (fast!) otherwise we need to go with vmalloc (safe!)
+ */
+static inline void *kgsl_malloc(size_t size)
+{
+	if (size <= PAGE_SIZE)
+		return kzalloc(size, GFP_KERNEL);
+
+	return vmalloc(size);
+}
+
+/**
+ * kgsl_free() - Free memory allocated by kgsl_malloc()
+ * @ptr: Pointer to the memory to free
+ *
+ * Free the memory be it in vmalloc or kmalloc space
+ */
+static inline void kgsl_free(void *ptr)
+{
+	if (ptr != NULL && is_vmalloc_addr(ptr))
+		return vfree(ptr);
+
+	kfree(ptr);
+}
+
+static inline int _copy_from_user(void *dest, void __user *src,
+		unsigned int ksize, unsigned int usize)
+{
+	unsigned int copy = ksize < usize ? ksize : usize;
+
+	if (copy == 0)
+		return -EINVAL;
+
+	return copy_from_user(dest, src, copy) ? -EFAULT : 0;
+}
+
+static inline void __user *to_user_ptr(uint64_t address)
+{
+	return (void __user *)(uintptr_t)address;
+}
+
+static inline void kgsl_gpu_sysfs_add_link(struct kobject *dst,
+			struct kobject *src, const char *src_name,
+			const char *dst_name)
+{
+	struct kernfs_node *old;
+
+	if (dst == NULL || src == NULL)
+		return;
+
+	old = sysfs_get_dirent(src->sd, src_name);
+	if (IS_ERR_OR_NULL(old))
+		return;
+
+	kernfs_create_link(dst->sd, dst_name, old);
+}
+#endif /* __KGSL_H */
diff --git a/drivers/gpu/msm/kgsl_compat.c b/drivers/gpu/msm/kgsl_compat.c
new file mode 100644
index 0000000..e0e6a2b
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_compat.c
@@ -0,0 +1,390 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/file.h>
+#include <linux/uaccess.h>
+#include <asm/ioctl.h>
+
+#include "kgsl.h"
+#include "kgsl_compat.h"
+#include "kgsl_device.h"
+#include "kgsl_sync.h"
+
+static long
+kgsl_ioctl_device_getproperty_compat(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data)
+{
+	struct kgsl_device_getproperty_compat *param32 = data;
+	struct kgsl_device_getproperty param;
+
+	param.type = param32->type;
+	param.value = compat_ptr(param32->value);
+	param.sizebytes = (size_t)param32->sizebytes;
+
+	return kgsl_ioctl_device_getproperty(dev_priv, cmd, &param);
+}
+
+static long
+kgsl_ioctl_device_setproperty_compat(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data)
+{
+	struct kgsl_device_getproperty_compat *param32 = data;
+	struct kgsl_device_getproperty param;
+
+	param.type = param32->type;
+	param.value = compat_ptr(param32->value);
+	param.sizebytes = (size_t)param32->sizebytes;
+
+	return kgsl_ioctl_device_setproperty(dev_priv, cmd, &param);
+}
+
+static long
+kgsl_ioctl_submit_commands_compat(struct kgsl_device_private *dev_priv,
+				      unsigned int cmd, void *data)
+{
+	int result;
+	struct kgsl_submit_commands_compat *param32 = data;
+	struct kgsl_submit_commands param;
+
+	param.context_id = param32->context_id;
+	param.flags = param32->flags;
+	param.cmdlist = compat_ptr(param32->cmdlist);
+	param.numcmds = param32->numcmds;
+	param.synclist = compat_ptr(param32->synclist);
+	param.numsyncs = param32->numsyncs;
+	param.timestamp = param32->timestamp;
+
+	result = kgsl_ioctl_submit_commands(dev_priv, cmd, &param);
+
+	param32->timestamp = param.timestamp;
+
+	return result;
+}
+
+static long
+kgsl_ioctl_rb_issueibcmds_compat(struct kgsl_device_private *dev_priv,
+				      unsigned int cmd, void *data)
+{
+	int result;
+	struct kgsl_ringbuffer_issueibcmds_compat *param32 = data;
+	struct kgsl_ringbuffer_issueibcmds param;
+
+	param.drawctxt_id = param32->drawctxt_id;
+	param.flags = param32->flags;
+	param.ibdesc_addr = (unsigned long)param32->ibdesc_addr;
+	param.numibs = param32->numibs;
+	param.timestamp = param32->timestamp;
+
+	result = kgsl_ioctl_rb_issueibcmds(dev_priv, cmd, &param);
+
+	param32->timestamp = param.timestamp;
+
+	return result;
+}
+
+static long kgsl_ioctl_cmdstream_freememontimestamp_ctxtid_compat(
+						struct kgsl_device_private
+						*dev_priv, unsigned int cmd,
+						void *data)
+{
+	struct kgsl_cmdstream_freememontimestamp_ctxtid_compat *param32 = data;
+	struct kgsl_cmdstream_freememontimestamp_ctxtid param;
+
+	param.context_id = param32->context_id;
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+	param.type = param32->type;
+	param.timestamp = param32->timestamp;
+
+	return kgsl_ioctl_cmdstream_freememontimestamp_ctxtid(dev_priv, cmd,
+								&param);
+}
+
+static long kgsl_ioctl_sharedmem_free_compat(struct kgsl_device_private
+					*dev_priv, unsigned int cmd,
+					void *data)
+{
+	struct kgsl_sharedmem_free_compat *param32 = data;
+	struct kgsl_sharedmem_free param;
+
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+
+	return kgsl_ioctl_sharedmem_free(dev_priv, cmd, &param);
+}
+
+static long kgsl_ioctl_map_user_mem_compat(struct kgsl_device_private
+					*dev_priv, unsigned int cmd,
+					void *data)
+{
+	int result = 0;
+	struct kgsl_map_user_mem_compat *param32 = data;
+	struct kgsl_map_user_mem param;
+
+	param.fd = param32->fd;
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+	param.len = (size_t)param32->len;
+	param.offset = (size_t)param32->offset;
+	param.hostptr = (unsigned long)param32->hostptr;
+	param.memtype = param32->memtype;
+	param.flags = param32->flags;
+
+	result = kgsl_ioctl_map_user_mem(dev_priv, cmd, &param);
+
+	param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr);
+	param32->flags = param.flags;
+	return result;
+}
+
+static long
+kgsl_ioctl_gpumem_sync_cache_compat(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data)
+{
+	struct kgsl_gpumem_sync_cache_compat *param32 = data;
+	struct kgsl_gpumem_sync_cache param;
+
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+	param.id = param32->id;
+	param.op = param32->op;
+	param.offset = (size_t)param32->offset;
+	param.length = (size_t)param32->length;
+
+	return kgsl_ioctl_gpumem_sync_cache(dev_priv, cmd, &param);
+}
+
+static long
+kgsl_ioctl_gpumem_sync_cache_bulk_compat(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_gpumem_sync_cache_bulk_compat *param32 = data;
+	struct kgsl_gpumem_sync_cache_bulk param;
+
+	param.id_list = to_user_ptr(param32->id_list);
+	param.count = param32->count;
+	param.op = param32->op;
+
+	return kgsl_ioctl_gpumem_sync_cache_bulk(dev_priv, cmd, &param);
+}
+
+static long
+kgsl_ioctl_sharedmem_flush_cache_compat(struct kgsl_device_private *dev_priv,
+				 unsigned int cmd, void *data)
+{
+	struct kgsl_sharedmem_free_compat *param32 = data;
+	struct kgsl_sharedmem_free param;
+
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+
+	return kgsl_ioctl_sharedmem_flush_cache(dev_priv, cmd, &param);
+}
+
+static long
+kgsl_ioctl_gpumem_alloc_compat(struct kgsl_device_private *dev_priv,
+			unsigned int cmd, void *data)
+{
+	int result = 0;
+	struct kgsl_gpumem_alloc_compat *param32 = data;
+	struct kgsl_gpumem_alloc param;
+
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+	param.size = (size_t)param32->size;
+	param.flags = param32->flags;
+
+	/*
+	 * Since this is a 32 bit application the page aligned size is expected
+	 * to fit inside of 32 bits - check for overflow and return error if so
+	 */
+	if (PAGE_ALIGN(param.size) >= UINT_MAX)
+		return -EINVAL;
+
+	result = kgsl_ioctl_gpumem_alloc(dev_priv, cmd, &param);
+
+	param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr);
+	param32->size = sizet_to_compat(param.size);
+	param32->flags = param.flags;
+
+	return result;
+}
+
+static long
+kgsl_ioctl_gpumem_alloc_id_compat(struct kgsl_device_private *dev_priv,
+			unsigned int cmd, void *data)
+{
+	int result = 0;
+	struct kgsl_gpumem_alloc_id_compat *param32 = data;
+	struct kgsl_gpumem_alloc_id param;
+
+	param.id = param32->id;
+	param.flags = param32->flags;
+	param.size = (size_t)param32->size;
+	param.mmapsize = (size_t)param32->mmapsize;
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+
+	/*
+	 * Since this is a 32 bit application the page aligned size is expected
+	 * to fit inside of 32 bits - check for overflow and return error if so
+	 */
+	if (PAGE_ALIGN(param.size) >= UINT_MAX)
+		return -EINVAL;
+
+	result = kgsl_ioctl_gpumem_alloc_id(dev_priv, cmd, &param);
+
+	param32->id = param.id;
+	param32->flags = param.flags;
+	param32->size = sizet_to_compat(param.size);
+	param32->mmapsize = sizet_to_compat(param.mmapsize);
+	param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr);
+
+	return result;
+}
+
+static long
+kgsl_ioctl_gpumem_get_info_compat(struct kgsl_device_private *dev_priv,
+				unsigned int cmd, void *data)
+{
+	int result = 0;
+	struct kgsl_gpumem_get_info_compat *param32 = data;
+	struct kgsl_gpumem_get_info param;
+
+	param.gpuaddr = (unsigned long)param32->gpuaddr;
+	param.id = param32->id;
+	param.flags = param32->flags;
+	param.size = (size_t)param32->size;
+	param.mmapsize = (size_t)param32->mmapsize;
+	param.useraddr = (unsigned long)param32->useraddr;
+
+	result = kgsl_ioctl_gpumem_get_info(dev_priv, cmd, &param);
+
+	param32->gpuaddr = gpuaddr_to_compat(param.gpuaddr);
+	param32->id = param.id;
+	param32->flags = param.flags;
+	param32->size = sizet_to_compat(param.size);
+	param32->mmapsize = sizet_to_compat(param.mmapsize);
+	param32->useraddr = (compat_ulong_t)param.useraddr;
+
+	return result;
+}
+
+static long kgsl_ioctl_timestamp_event_compat(struct kgsl_device_private
+				*dev_priv, unsigned int cmd, void *data)
+{
+	struct kgsl_timestamp_event_compat *param32 = data;
+	struct kgsl_timestamp_event param;
+
+	param.type = param32->type;
+	param.timestamp = param32->timestamp;
+	param.context_id = param32->context_id;
+	param.priv = compat_ptr(param32->priv);
+	param.len = (size_t)param32->len;
+
+	return kgsl_ioctl_timestamp_event(dev_priv, cmd, &param);
+}
+
+
+static const struct kgsl_ioctl kgsl_compat_ioctl_funcs[] = {
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY_COMPAT,
+			kgsl_ioctl_device_getproperty_compat),
+	/* IOCTL_KGSL_DEVICE_WAITTIMESTAMP is no longer supported */
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID,
+			kgsl_ioctl_device_waittimestamp_ctxtid),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS_COMPAT,
+			kgsl_ioctl_rb_issueibcmds_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS_COMPAT,
+			kgsl_ioctl_submit_commands_compat),
+	/* IOCTL_KGSL_CMDSTREAM_READTIMESTAMP is no longer supported */
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID,
+			kgsl_ioctl_cmdstream_readtimestamp_ctxtid),
+	/* IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP is no longer supported */
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID_COMPAT,
+			kgsl_ioctl_cmdstream_freememontimestamp_ctxtid_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE,
+			kgsl_ioctl_drawctxt_create),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY,
+			kgsl_ioctl_drawctxt_destroy),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM_COMPAT,
+			kgsl_ioctl_map_user_mem_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FREE_COMPAT,
+			kgsl_ioctl_sharedmem_free_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE_COMPAT,
+			kgsl_ioctl_sharedmem_flush_cache_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_COMPAT,
+			kgsl_ioctl_gpumem_alloc_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT_COMPAT,
+			kgsl_ioctl_timestamp_event_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY_COMPAT,
+			kgsl_ioctl_device_setproperty_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_ID_COMPAT,
+			kgsl_ioctl_gpumem_alloc_id_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_FREE_ID,
+			kgsl_ioctl_gpumem_free_id),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO_COMPAT,
+			kgsl_ioctl_gpumem_get_info_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_COMPAT,
+			kgsl_ioctl_gpumem_sync_cache_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK_COMPAT,
+			kgsl_ioctl_gpumem_sync_cache_bulk_compat),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE,
+			kgsl_ioctl_syncsource_create),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_DESTROY,
+			kgsl_ioctl_syncsource_destroy),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE,
+			kgsl_ioctl_syncsource_create_fence),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE,
+			kgsl_ioctl_syncsource_signal_fence),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_ALLOC,
+			kgsl_ioctl_gpuobj_alloc),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_FREE,
+			kgsl_ioctl_gpuobj_free),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_INFO,
+			kgsl_ioctl_gpuobj_info),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_IMPORT,
+			kgsl_ioctl_gpuobj_import),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SYNC,
+			kgsl_ioctl_gpuobj_sync),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_COMMAND,
+			kgsl_ioctl_gpu_command),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SET_INFO,
+			kgsl_ioctl_gpuobj_set_info),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_PHYS_ALLOC,
+			kgsl_ioctl_sparse_phys_alloc),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_PHYS_FREE,
+			kgsl_ioctl_sparse_phys_free),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_VIRT_ALLOC,
+			kgsl_ioctl_sparse_virt_alloc),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_VIRT_FREE,
+			kgsl_ioctl_sparse_virt_free),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_BIND,
+			kgsl_ioctl_sparse_bind),
+};
+
+long kgsl_compat_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+	struct kgsl_device_private *dev_priv = filep->private_data;
+	struct kgsl_device *device = dev_priv->device;
+
+	long ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_compat_ioctl_funcs,
+		ARRAY_SIZE(kgsl_compat_ioctl_funcs));
+
+	/*
+	 * If the command was unrecognized in the generic core, try the device
+	 * specific function
+	 */
+
+	if (ret == -ENOIOCTLCMD) {
+		if (device->ftbl->compat_ioctl != NULL)
+			return device->ftbl->compat_ioctl(dev_priv, cmd, arg);
+
+		KGSL_DRV_INFO(device, "invalid ioctl code 0x%08X\n", cmd);
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/msm/kgsl_compat.h b/drivers/gpu/msm/kgsl_compat.h
new file mode 100644
index 0000000..621b232
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_compat.h
@@ -0,0 +1,263 @@
+/* Copyright (c) 2013-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_COMPAT_H
+#define __KGSL_COMPAT_H
+
+#ifdef CONFIG_COMPAT
+#include <linux/compat.h>
+#include "kgsl.h"
+#include "kgsl_device.h"
+
+struct kgsl_ibdesc_compat {
+	compat_ulong_t gpuaddr;
+	unsigned int __pad;
+	compat_size_t sizedwords;
+	unsigned int ctrl;
+};
+
+struct kgsl_cmd_syncpoint_compat {
+	int type;
+	compat_uptr_t priv;
+	compat_size_t size;
+};
+
+struct kgsl_devinfo_compat {
+	unsigned int device_id;
+	unsigned int chip_id;
+	unsigned int mmu_enabled;
+	compat_ulong_t gmem_gpubaseaddr;
+	unsigned int gpu_id;
+	compat_size_t gmem_sizebytes;
+};
+
+struct kgsl_shadowprop_compat {
+	compat_ulong_t gpuaddr;
+	compat_size_t size;
+	unsigned int flags;
+};
+
+struct kgsl_device_constraint_compat {
+	unsigned int type;
+	unsigned int context_id;
+	compat_uptr_t data;
+	compat_size_t size;
+};
+
+struct kgsl_device_getproperty_compat {
+	unsigned int type;
+	compat_uptr_t value;
+	compat_size_t sizebytes;
+};
+
+#define IOCTL_KGSL_DEVICE_GETPROPERTY_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x2, struct kgsl_device_getproperty_compat)
+
+#define IOCTL_KGSL_SETPROPERTY_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x32, struct kgsl_device_getproperty_compat)
+
+
+struct kgsl_submit_commands_compat {
+	unsigned int context_id;
+	unsigned int flags;
+	compat_uptr_t cmdlist;
+	unsigned int numcmds;
+	compat_uptr_t synclist;
+	unsigned int numsyncs;
+	unsigned int timestamp;
+/* private: reserved for future use */
+	unsigned int __pad[4];
+};
+
+#define IOCTL_KGSL_SUBMIT_COMMANDS_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x3D, struct kgsl_submit_commands_compat)
+
+struct kgsl_ringbuffer_issueibcmds_compat {
+	unsigned int drawctxt_id;
+	compat_ulong_t ibdesc_addr;
+	unsigned int numibs;
+	unsigned int timestamp; /* output param */
+	unsigned int flags;
+};
+
+#define IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x10, struct kgsl_ringbuffer_issueibcmds_compat)
+
+struct kgsl_cmdstream_freememontimestamp_ctxtid_compat {
+	unsigned int context_id;
+	compat_ulong_t gpuaddr;
+	unsigned int type;
+	unsigned int timestamp;
+};
+
+#define IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x17, \
+	struct kgsl_cmdstream_freememontimestamp_ctxtid_compat)
+
+struct kgsl_map_user_mem_compat {
+	int fd;
+	compat_ulong_t gpuaddr;
+	compat_size_t len;
+	compat_size_t offset;
+	compat_ulong_t hostptr;
+	enum kgsl_user_mem_type memtype;
+	unsigned int flags;
+};
+
+#define IOCTL_KGSL_MAP_USER_MEM_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x15, struct kgsl_map_user_mem_compat)
+
+struct kgsl_sharedmem_free_compat {
+	compat_ulong_t gpuaddr;
+};
+
+#define IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x24, struct kgsl_sharedmem_free_compat)
+
+#define IOCTL_KGSL_SHAREDMEM_FREE_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x21, struct kgsl_sharedmem_free_compat)
+
+struct kgsl_gpumem_alloc_compat {
+	compat_ulong_t gpuaddr; /* output param */
+	compat_size_t size;
+	unsigned int flags;
+};
+
+#define IOCTL_KGSL_GPUMEM_ALLOC_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x2f, struct kgsl_gpumem_alloc_compat)
+
+struct kgsl_cff_syncmem_compat {
+	compat_ulong_t gpuaddr;
+	compat_size_t len;
+	unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_CFF_SYNCMEM_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x30, struct kgsl_cff_syncmem_compat)
+
+struct kgsl_timestamp_event_compat {
+	int type;                /* Type of event (see list below) */
+	unsigned int timestamp;  /* Timestamp to trigger event on */
+	unsigned int context_id; /* Context for the timestamp */
+	compat_uptr_t priv;      /* Pointer to the event specific blob */
+	compat_size_t len;       /* Size of the event specific blob */
+};
+
+#define IOCTL_KGSL_TIMESTAMP_EVENT_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x33, struct kgsl_timestamp_event_compat)
+
+struct kgsl_gpumem_alloc_id_compat {
+	unsigned int id;
+	unsigned int flags;
+	compat_size_t size;
+	compat_size_t mmapsize;
+	compat_ulong_t gpuaddr;
+/* private: reserved for future use*/
+	unsigned int __pad[2];
+};
+
+#define IOCTL_KGSL_GPUMEM_ALLOC_ID_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x34, struct kgsl_gpumem_alloc_id_compat)
+
+struct kgsl_gpumem_get_info_compat {
+	compat_ulong_t gpuaddr;
+	unsigned int id;
+	unsigned int flags;
+	compat_size_t size;
+	compat_size_t mmapsize;
+	compat_ulong_t useraddr;
+/* private: reserved for future use*/
+	unsigned int __pad[4];
+};
+
+#define IOCTL_KGSL_GPUMEM_GET_INFO_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x36, struct kgsl_gpumem_get_info_compat)
+
+struct kgsl_gpumem_sync_cache_compat {
+	compat_ulong_t gpuaddr;
+	unsigned int id;
+	unsigned int op;
+	compat_size_t offset;
+	compat_size_t length;
+};
+
+#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_COMPAT \
+	_IOW(KGSL_IOC_TYPE, 0x37, struct kgsl_gpumem_sync_cache_compat)
+
+struct kgsl_gpumem_sync_cache_bulk_compat {
+	compat_uptr_t id_list;
+	unsigned int count;
+	unsigned int op;
+/* private: reserved for future use */
+	unsigned int __pad[2]; /* For future binary compatibility */
+};
+
+#define IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x3C, struct kgsl_gpumem_sync_cache_bulk_compat)
+
+struct kgsl_perfcounter_query_compat {
+	unsigned int groupid;
+	compat_uptr_t countables;
+	unsigned int count;
+	unsigned int max_counters;
+	unsigned int __pad[2];
+};
+
+#define IOCTL_KGSL_PERFCOUNTER_QUERY_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x3A, struct kgsl_perfcounter_query_compat)
+
+struct kgsl_perfcounter_read_compat {
+	compat_uptr_t reads;
+	unsigned int count;
+	unsigned int __pad[2];
+};
+
+#define IOCTL_KGSL_PERFCOUNTER_READ_COMPAT \
+	_IOWR(KGSL_IOC_TYPE, 0x3B, struct kgsl_perfcounter_read_compat)
+
+static inline compat_ulong_t gpuaddr_to_compat(unsigned long gpuaddr)
+{
+	WARN(gpuaddr >> 32, "Top 32 bits of gpuaddr have been set\n");
+	return (compat_ulong_t)gpuaddr;
+}
+
+static inline compat_size_t sizet_to_compat(size_t size)
+{
+	WARN(size >> 32, "Size greater than 4G\n");
+	return (compat_size_t)size;
+}
+
+int kgsl_drawobj_create_compat(struct kgsl_device *device, unsigned int flags,
+			struct kgsl_drawobj *drawobj, void __user *cmdlist,
+			unsigned int numcmds, void __user *synclist,
+			unsigned int numsyncs);
+
+long kgsl_compat_ioctl(struct file *filep, unsigned int cmd,
+			unsigned long arg);
+
+#else
+static inline int kgsl_drawobj_create_compat(struct kgsl_device *device,
+			unsigned int flags, struct kgsl_drawobj *drawobj,
+			void __user *cmdlist, unsigned int numcmds,
+			void __user *synclist, unsigned int numsyncs)
+{
+	return -EINVAL;
+}
+
+static inline long kgsl_compat_ioctl(struct file *filep, unsigned int cmd,
+			unsigned long arg)
+{
+	return -EINVAL;
+}
+
+#endif /* CONFIG_COMPAT */
+#endif /* __KGSL_COMPAT_H */
diff --git a/drivers/gpu/msm/kgsl_debugfs.c b/drivers/gpu/msm/kgsl_debugfs.c
new file mode 100644
index 0000000..659ea46
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_debugfs.c
@@ -0,0 +1,449 @@
+/* Copyright (c) 2002,2008-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/debugfs.h>
+
+#include "kgsl.h"
+#include "kgsl_device.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_debugfs.h"
+
+/*default log levels is error for everything*/
+#define KGSL_LOG_LEVEL_MAX     7
+
+struct dentry *kgsl_debugfs_dir;
+static struct dentry *proc_d_debugfs;
+
+static inline int kgsl_log_set(unsigned int *log_val, void *data, u64 val)
+{
+	*log_val = min_t(unsigned int, val, KGSL_LOG_LEVEL_MAX);
+	return 0;
+}
+
+#define KGSL_DEBUGFS_LOG(__log)                         \
+static int __log ## _set(void *data, u64 val)           \
+{                                                       \
+	struct kgsl_device *device = data;              \
+	return kgsl_log_set(&device->__log, data, val); \
+}                                                       \
+static int __log ## _get(void *data, u64 *val)	        \
+{                                                       \
+	struct kgsl_device *device = data;              \
+	*val = device->__log;                           \
+	return 0;                                       \
+}                                                       \
+DEFINE_SIMPLE_ATTRIBUTE(__log ## _fops,                 \
+__log ## _get, __log ## _set, "%llu\n")                 \
+
+KGSL_DEBUGFS_LOG(drv_log);
+KGSL_DEBUGFS_LOG(cmd_log);
+KGSL_DEBUGFS_LOG(ctxt_log);
+KGSL_DEBUGFS_LOG(mem_log);
+KGSL_DEBUGFS_LOG(pwr_log);
+
+static int _strict_set(void *data, u64 val)
+{
+	kgsl_sharedmem_set_noretry(val ? true : false);
+	return 0;
+}
+
+static int _strict_get(void *data, u64 *val)
+{
+	*val = kgsl_sharedmem_get_noretry();
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(_strict_fops, _strict_get, _strict_set, "%llu\n");
+
+void kgsl_device_debugfs_init(struct kgsl_device *device)
+{
+	if (kgsl_debugfs_dir && !IS_ERR(kgsl_debugfs_dir))
+		device->d_debugfs = debugfs_create_dir(device->name,
+						       kgsl_debugfs_dir);
+
+	if (!device->d_debugfs || IS_ERR(device->d_debugfs))
+		return;
+
+	debugfs_create_file("log_level_cmd", 0644, device->d_debugfs, device,
+			    &cmd_log_fops);
+	debugfs_create_file("log_level_ctxt", 0644, device->d_debugfs, device,
+			    &ctxt_log_fops);
+	debugfs_create_file("log_level_drv", 0644, device->d_debugfs, device,
+			    &drv_log_fops);
+	debugfs_create_file("log_level_mem", 0644, device->d_debugfs, device,
+				&mem_log_fops);
+	debugfs_create_file("log_level_pwr", 0644, device->d_debugfs, device,
+				&pwr_log_fops);
+}
+
+struct type_entry {
+	int type;
+	const char *str;
+};
+
+static const struct type_entry memtypes[] = { KGSL_MEM_TYPES };
+
+static const char *memtype_str(int memtype)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(memtypes); i++)
+		if (memtypes[i].type == memtype)
+			return memtypes[i].str;
+	return "unknown";
+}
+
+static char get_alignflag(const struct kgsl_memdesc *m)
+{
+	int align = kgsl_memdesc_get_align(m);
+
+	if (align >= ilog2(SZ_1M))
+		return 'L';
+	else if (align >= ilog2(SZ_64K))
+		return 'l';
+	return '-';
+}
+
+static char get_cacheflag(const struct kgsl_memdesc *m)
+{
+	static const char table[] = {
+		[KGSL_CACHEMODE_WRITECOMBINE] = '-',
+		[KGSL_CACHEMODE_UNCACHED] = 'u',
+		[KGSL_CACHEMODE_WRITEBACK] = 'b',
+		[KGSL_CACHEMODE_WRITETHROUGH] = 't',
+	};
+
+	return table[kgsl_memdesc_get_cachemode(m)];
+}
+
+
+static int print_mem_entry(void *data, void *ptr)
+{
+	struct seq_file *s = data;
+	struct kgsl_mem_entry *entry = ptr;
+	char flags[10];
+	char usage[16];
+	struct kgsl_memdesc *m = &entry->memdesc;
+	unsigned int usermem_type = kgsl_memdesc_usermem_type(m);
+	int egl_surface_count = 0, egl_image_count = 0;
+
+	if (m->flags & KGSL_MEMFLAGS_SPARSE_VIRT)
+		return 0;
+
+	flags[0] = kgsl_memdesc_is_global(m) ?  'g' : '-';
+	flags[1] = '-';
+	flags[2] = !(m->flags & KGSL_MEMFLAGS_GPUREADONLY) ? 'w' : '-';
+	flags[3] = get_alignflag(m);
+	flags[4] = get_cacheflag(m);
+	flags[5] = kgsl_memdesc_use_cpu_map(m) ? 'p' : '-';
+	flags[6] = (m->useraddr) ? 'Y' : 'N';
+	flags[7] = kgsl_memdesc_is_secured(m) ?  's' : '-';
+	flags[8] = m->flags & KGSL_MEMFLAGS_SPARSE_PHYS ? 'P' : '-';
+	flags[9] = '\0';
+
+	kgsl_get_memory_usage(usage, sizeof(usage), m->flags);
+
+	if (usermem_type == KGSL_MEM_ENTRY_ION)
+		kgsl_get_egl_counts(entry, &egl_surface_count,
+						&egl_image_count);
+
+	seq_printf(s, "%pK %pK %16llu %5d %9s %10s %16s %5d %16llu %6d %6d",
+			(uint64_t *)(uintptr_t) m->gpuaddr,
+			(unsigned long *) m->useraddr,
+			m->size, entry->id, flags,
+			memtype_str(usermem_type),
+			usage, (m->sgt ? m->sgt->nents : 0), m->mapsize,
+			egl_surface_count, egl_image_count);
+
+	if (entry->metadata[0] != 0)
+		seq_printf(s, " %s", entry->metadata);
+
+	seq_putc(s, '\n');
+
+	return 0;
+}
+
+static struct kgsl_mem_entry *process_mem_seq_find(struct seq_file *s,
+						void *ptr, loff_t pos)
+{
+	struct kgsl_mem_entry *entry = ptr;
+	struct kgsl_process_private *private = s->private;
+	int id = 0;
+
+	loff_t temp_pos = 1;
+
+	if (entry != SEQ_START_TOKEN)
+		id = entry->id + 1;
+
+	spin_lock(&private->mem_lock);
+	for (entry = idr_get_next(&private->mem_idr, &id); entry;
+		id++, entry = idr_get_next(&private->mem_idr, &id),
+							temp_pos++) {
+		if (temp_pos == pos && kgsl_mem_entry_get(entry)) {
+			spin_unlock(&private->mem_lock);
+			goto found;
+		}
+	}
+	spin_unlock(&private->mem_lock);
+
+	entry = NULL;
+found:
+	if (ptr != SEQ_START_TOKEN)
+		kgsl_mem_entry_put(ptr);
+
+	return entry;
+}
+
+static void *process_mem_seq_start(struct seq_file *s, loff_t *pos)
+{
+	loff_t seq_file_offset = *pos;
+
+	if (seq_file_offset == 0)
+		return SEQ_START_TOKEN;
+	else
+		return process_mem_seq_find(s, SEQ_START_TOKEN,
+						seq_file_offset);
+}
+
+static void process_mem_seq_stop(struct seq_file *s, void *ptr)
+{
+	if (ptr && ptr != SEQ_START_TOKEN)
+		kgsl_mem_entry_put(ptr);
+}
+
+static void *process_mem_seq_next(struct seq_file *s, void *ptr,
+							loff_t *pos)
+{
+	++*pos;
+	return process_mem_seq_find(s, ptr, 1);
+}
+
+static int process_mem_seq_show(struct seq_file *s, void *ptr)
+{
+	if (ptr == SEQ_START_TOKEN) {
+		seq_printf(s, "%16s %16s %16s %5s %9s %10s %16s %5s %16s %6s %6s\n",
+			"gpuaddr", "useraddr", "size", "id", "flags", "type",
+			"usage", "sglen", "mapsize", "eglsrf", "eglimg");
+		return 0;
+	} else
+		return print_mem_entry(s, ptr);
+}
+
+static const struct seq_operations process_mem_seq_fops = {
+	.start = process_mem_seq_start,
+	.stop = process_mem_seq_stop,
+	.next = process_mem_seq_next,
+	.show = process_mem_seq_show,
+};
+
+static int process_mem_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	pid_t pid = (pid_t) (unsigned long) inode->i_private;
+	struct seq_file *s = NULL;
+	struct kgsl_process_private *private = NULL;
+
+	private = kgsl_process_private_find(pid);
+
+	if (!private)
+		return -ENODEV;
+
+	ret = seq_open(file, &process_mem_seq_fops);
+	if (ret)
+		kgsl_process_private_put(private);
+	else {
+		s = file->private_data;
+		s->private = private;
+	}
+
+	return ret;
+}
+
+static int process_mem_release(struct inode *inode, struct file *file)
+{
+	struct kgsl_process_private *private =
+		((struct seq_file *)file->private_data)->private;
+
+	if (private)
+		kgsl_process_private_put(private);
+
+	return seq_release(inode, file);
+}
+
+static const struct file_operations process_mem_fops = {
+	.open = process_mem_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = process_mem_release,
+};
+
+static int print_sparse_mem_entry(int id, void *ptr, void *data)
+{
+	struct seq_file *s = data;
+	struct kgsl_mem_entry *entry = ptr;
+	struct kgsl_memdesc *m = &entry->memdesc;
+	struct rb_node *node;
+
+	if (!(m->flags & KGSL_MEMFLAGS_SPARSE_VIRT))
+		return 0;
+
+	node = rb_first(&entry->bind_tree);
+
+	while (node != NULL) {
+		struct sparse_bind_object *obj = rb_entry(node,
+				struct sparse_bind_object, node);
+		seq_printf(s, "%5d %16llx %16llx %16llx %16llx\n",
+				entry->id, entry->memdesc.gpuaddr,
+				obj->v_off, obj->size, obj->p_off);
+		node = rb_next(node);
+	}
+
+	seq_putc(s, '\n');
+
+	return 0;
+}
+
+static int process_sparse_mem_print(struct seq_file *s, void *unused)
+{
+	struct kgsl_process_private *private = s->private;
+
+	seq_printf(s, "%5s %16s %16s %16s %16s\n",
+		   "v_id", "gpuaddr", "v_offset", "v_size", "p_offset");
+
+	spin_lock(&private->mem_lock);
+	idr_for_each(&private->mem_idr, print_sparse_mem_entry, s);
+	spin_unlock(&private->mem_lock);
+
+	return 0;
+}
+
+static int process_sparse_mem_open(struct inode *inode, struct file *file)
+{
+	int ret;
+	pid_t pid = (pid_t) (unsigned long) inode->i_private;
+	struct kgsl_process_private *private = NULL;
+
+	private = kgsl_process_private_find(pid);
+
+	if (!private)
+		return -ENODEV;
+
+	ret = single_open(file, process_sparse_mem_print, private);
+	if (ret)
+		kgsl_process_private_put(private);
+
+	return ret;
+}
+
+static const struct file_operations process_sparse_mem_fops = {
+	.open = process_sparse_mem_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = process_mem_release,
+};
+
+static int globals_print(struct seq_file *s, void *unused)
+{
+	kgsl_print_global_pt_entries(s);
+	return 0;
+}
+
+static int globals_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, globals_print, NULL);
+}
+
+static int globals_release(struct inode *inode, struct file *file)
+{
+	return single_release(inode, file);
+}
+
+static const struct file_operations global_fops = {
+	.open = globals_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = globals_release,
+};
+
+/**
+ * kgsl_process_init_debugfs() - Initialize debugfs for a process
+ * @private: Pointer to process private structure created for the process
+ *
+ * kgsl_process_init_debugfs() is called at the time of creating the
+ * process struct when a process opens kgsl device for the first time.
+ * This function is not fatal - all we do is print a warning message if
+ * the files can't be created
+ */
+void kgsl_process_init_debugfs(struct kgsl_process_private *private)
+{
+	unsigned char name[16];
+	struct dentry *dentry;
+
+	snprintf(name, sizeof(name), "%d", private->pid);
+
+	private->debug_root = debugfs_create_dir(name, proc_d_debugfs);
+
+	/*
+	 * Both debugfs_create_dir() and debugfs_create_file() return
+	 * ERR_PTR(-ENODEV) if debugfs is disabled in the kernel but return
+	 * NULL on error when it is enabled. For both usages we need to check
+	 * for ERROR or NULL and only print a warning on an actual failure
+	 * (i.e. - when the return value is NULL)
+	 */
+
+	if (IS_ERR_OR_NULL(private->debug_root)) {
+		WARN((private->debug_root == NULL),
+			"Unable to create debugfs dir for %s\n", name);
+		private->debug_root = NULL;
+		return;
+	}
+
+	dentry = debugfs_create_file("mem", 0444, private->debug_root,
+		(void *) ((unsigned long) private->pid), &process_mem_fops);
+
+	if (IS_ERR_OR_NULL(dentry))
+		WARN((dentry == NULL),
+			"Unable to create 'mem' file for %s\n", name);
+
+	dentry = debugfs_create_file("sparse_mem", 0444, private->debug_root,
+		(void *) ((unsigned long) private->pid),
+		&process_sparse_mem_fops);
+
+	if (IS_ERR_OR_NULL(dentry))
+		WARN((dentry == NULL),
+			"Unable to create 'sparse_mem' file for %s\n", name);
+
+}
+
+void kgsl_core_debugfs_init(void)
+{
+	struct dentry *debug_dir;
+
+	kgsl_debugfs_dir = debugfs_create_dir("kgsl", NULL);
+
+	debugfs_create_file("globals", 0444, kgsl_debugfs_dir, NULL,
+		&global_fops);
+
+	debug_dir = debugfs_create_dir("debug", kgsl_debugfs_dir);
+
+	debugfs_create_file("strict_memory", 0644, debug_dir, NULL,
+		&_strict_fops);
+
+	proc_d_debugfs = debugfs_create_dir("proc", kgsl_debugfs_dir);
+}
+
+void kgsl_core_debugfs_close(void)
+{
+	debugfs_remove_recursive(kgsl_debugfs_dir);
+}
diff --git a/drivers/gpu/msm/kgsl_debugfs.h b/drivers/gpu/msm/kgsl_debugfs.h
new file mode 100644
index 0000000..b5173f2
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_debugfs.h
@@ -0,0 +1,44 @@
+/* Copyright (c) 2002,2008-2011,2013,2015,2017 The Linux Foundation.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _KGSL_DEBUGFS_H
+#define _KGSL_DEBUGFS_H
+
+struct kgsl_device;
+struct kgsl_process_private;
+
+#ifdef CONFIG_DEBUG_FS
+void kgsl_core_debugfs_init(void);
+void kgsl_core_debugfs_close(void);
+
+void kgsl_device_debugfs_init(struct kgsl_device *device);
+
+extern struct dentry *kgsl_debugfs_dir;
+static inline struct dentry *kgsl_get_debugfs_dir(void)
+{
+	return kgsl_debugfs_dir;
+}
+
+void kgsl_process_init_debugfs(struct kgsl_process_private *priv);
+#else
+static inline void kgsl_core_debugfs_init(void) { }
+static inline void kgsl_device_debugfs_init(struct kgsl_device *device) { }
+static inline void kgsl_core_debugfs_close(void) { }
+static inline struct dentry *kgsl_get_debugfs_dir(void) { return NULL; }
+static inline void kgsl_process_init_debugfs(struct kgsl_process_private *priv)
+{
+}
+#endif
+
+#endif
diff --git a/drivers/gpu/msm/kgsl_device.h b/drivers/gpu/msm/kgsl_device.h
new file mode 100644
index 0000000..ed3f78a
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_device.h
@@ -0,0 +1,879 @@
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_DEVICE_H
+#define __KGSL_DEVICE_H
+
+#include <linux/slab.h>
+#include <linux/idr.h>
+#include <linux/pm_qos.h>
+#include <linux/sched.h>
+
+#include "kgsl.h"
+#include "kgsl_mmu.h"
+#include "kgsl_pwrctrl.h"
+#include "kgsl_log.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_snapshot.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_drawobj.h"
+
+#define KGSL_IOCTL_FUNC(_cmd, _func) \
+	[_IOC_NR((_cmd))] = \
+		{ .cmd = (_cmd), .func = (_func) }
+
+/*
+ * KGSL device state is initialized to INIT when platform_probe		*
+ * successfully initialized the device.  Once a device has been opened	*
+ * (started) it becomes active.  NAP implies that only low latency	*
+ * resources (for now clocks on some platforms) are off.  SLEEP implies	*
+ * that the KGSL module believes a device is idle (has been inactive	*
+ * past its timer) and all system resources are released.  SUSPEND is	*
+ * requested by the kernel and will be enforced upon all open devices.	*
+ */
+
+#define KGSL_STATE_NONE		0x00000000
+#define KGSL_STATE_INIT		0x00000001
+#define KGSL_STATE_ACTIVE	0x00000002
+#define KGSL_STATE_NAP		0x00000004
+#define KGSL_STATE_SUSPEND	0x00000010
+#define KGSL_STATE_AWARE	0x00000020
+#define KGSL_STATE_SLUMBER	0x00000080
+
+/**
+ * enum kgsl_event_results - result codes passed to an event callback when the
+ * event is retired or cancelled
+ * @KGSL_EVENT_RETIRED: The timestamp associated with the event retired
+ * successflly
+ * @KGSL_EVENT_CANCELLED: The event was cancelled before the event was fired
+ */
+enum kgsl_event_results {
+	KGSL_EVENT_RETIRED = 1,
+	KGSL_EVENT_CANCELLED = 2,
+};
+
+#define KGSL_FLAG_WAKE_ON_TOUCH BIT(0)
+
+/*
+ * "list" of event types for ftrace symbolic magic
+ */
+
+#define KGSL_EVENT_TYPES \
+	{ KGSL_EVENT_RETIRED, "retired" }, \
+	{ KGSL_EVENT_CANCELLED, "cancelled" }
+
+#define KGSL_CONTEXT_FLAGS \
+	{ KGSL_CONTEXT_NO_GMEM_ALLOC, "NO_GMEM_ALLOC" }, \
+	{ KGSL_CONTEXT_PREAMBLE, "PREAMBLE" }, \
+	{ KGSL_CONTEXT_TRASH_STATE, "TRASH_STATE" }, \
+	{ KGSL_CONTEXT_CTX_SWITCH, "CTX_SWITCH" }, \
+	{ KGSL_CONTEXT_PER_CONTEXT_TS, "PER_CONTEXT_TS" }, \
+	{ KGSL_CONTEXT_USER_GENERATED_TS, "USER_TS" }, \
+	{ KGSL_CONTEXT_NO_FAULT_TOLERANCE, "NO_FT" }, \
+	{ KGSL_CONTEXT_PWR_CONSTRAINT, "PWR" }, \
+	{ KGSL_CONTEXT_SAVE_GMEM, "SAVE_GMEM" }
+
+#define KGSL_CONTEXT_TYPES \
+	{ KGSL_CONTEXT_TYPE_ANY, "ANY" }, \
+	{ KGSL_CONTEXT_TYPE_GL, "GL" }, \
+	{ KGSL_CONTEXT_TYPE_CL, "CL" }, \
+	{ KGSL_CONTEXT_TYPE_C2D, "C2D" }, \
+	{ KGSL_CONTEXT_TYPE_RS, "RS" }
+
+#define KGSL_CONTEXT_ID(_context) \
+	((_context != NULL) ? (_context)->id : KGSL_MEMSTORE_GLOBAL)
+
+/* Allocate 600K for the snapshot static region*/
+#define KGSL_SNAPSHOT_MEMSIZE (600 * 1024)
+
+struct kgsl_device;
+struct platform_device;
+struct kgsl_device_private;
+struct kgsl_context;
+struct kgsl_power_stats;
+struct kgsl_event;
+struct kgsl_snapshot;
+
+struct kgsl_functable {
+	/* Mandatory functions - these functions must be implemented
+	 * by the client device.  The driver will not check for a NULL
+	 * pointer before calling the hook.
+	 */
+	void (*regread)(struct kgsl_device *device,
+		unsigned int offsetwords, unsigned int *value);
+	void (*regwrite)(struct kgsl_device *device,
+		unsigned int offsetwords, unsigned int value);
+	int (*idle)(struct kgsl_device *device);
+	bool (*isidle)(struct kgsl_device *device);
+	int (*suspend_context)(struct kgsl_device *device);
+	int (*init)(struct kgsl_device *device);
+	int (*start)(struct kgsl_device *device, int priority);
+	int (*stop)(struct kgsl_device *device);
+	int (*getproperty)(struct kgsl_device *device,
+		unsigned int type, void __user *value,
+		size_t sizebytes);
+	int (*getproperty_compat)(struct kgsl_device *device,
+		unsigned int type, void __user *value,
+		size_t sizebytes);
+	int (*waittimestamp)(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int timestamp,
+		unsigned int msecs);
+	int (*readtimestamp)(struct kgsl_device *device, void *priv,
+		enum kgsl_timestamp_type type, unsigned int *timestamp);
+	int (*queue_cmds)(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context, struct kgsl_drawobj *drawobj[],
+		uint32_t count, uint32_t *timestamp);
+	void (*power_stats)(struct kgsl_device *device,
+		struct kgsl_power_stats *stats);
+	unsigned int (*gpuid)(struct kgsl_device *device, unsigned int *chipid);
+	void (*snapshot)(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot, struct kgsl_context *context);
+	irqreturn_t (*irq_handler)(struct kgsl_device *device);
+	int (*drain)(struct kgsl_device *device);
+	/*
+	 * Optional functions - these functions are not mandatory.  The
+	 * driver will check that the function pointer is not NULL before
+	 * calling the hook
+	 */
+	struct kgsl_context *(*drawctxt_create)(struct kgsl_device_private *,
+						uint32_t *flags);
+	void (*drawctxt_detach)(struct kgsl_context *context);
+	void (*drawctxt_destroy)(struct kgsl_context *context);
+	void (*drawctxt_dump)(struct kgsl_device *device,
+		struct kgsl_context *context);
+	long (*ioctl)(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg);
+	long (*compat_ioctl)(struct kgsl_device_private *dev_priv,
+		unsigned int cmd, unsigned long arg);
+	int (*setproperty)(struct kgsl_device_private *dev_priv,
+		unsigned int type, void __user *value,
+		unsigned int sizebytes);
+	int (*setproperty_compat)(struct kgsl_device_private *dev_priv,
+		unsigned int type, void __user *value,
+		unsigned int sizebytes);
+	void (*drawctxt_sched)(struct kgsl_device *device,
+		struct kgsl_context *context);
+	void (*resume)(struct kgsl_device *device);
+	int (*regulator_enable)(struct kgsl_device *);
+	bool (*is_hw_collapsible)(struct kgsl_device *);
+	void (*regulator_disable)(struct kgsl_device *);
+	void (*pwrlevel_change_settings)(struct kgsl_device *device,
+		unsigned int prelevel, unsigned int postlevel, bool post);
+	void (*regulator_disable_poll)(struct kgsl_device *device);
+	void (*clk_set_options)(struct kgsl_device *device,
+		const char *name, struct clk *clk);
+	void (*gpu_model)(struct kgsl_device *device, char *str,
+		size_t bufsz);
+};
+
+struct kgsl_ioctl {
+	unsigned int cmd;
+	long (*func)(struct kgsl_device_private *, unsigned int, void *);
+};
+
+long kgsl_ioctl_helper(struct file *filep, unsigned int cmd, unsigned long arg,
+		const struct kgsl_ioctl *cmds, int len);
+
+/* Flag to mark the memobj_node as a preamble */
+#define MEMOBJ_PREAMBLE BIT(0)
+/* Flag to mark that the memobj_node should not go to the hadrware */
+#define MEMOBJ_SKIP BIT(1)
+
+/**
+ * struct kgsl_memobj_node - Memory object descriptor
+ * @node: Local list node for the object
+ * @id: GPU memory ID for the object
+ * offset: Offset within the object
+ * @gpuaddr: GPU address for the object
+ * @flags: External flags passed by the user
+ * @priv: Internal flags set by the driver
+ */
+struct kgsl_memobj_node {
+	struct list_head node;
+	unsigned int id;
+	uint64_t offset;
+	uint64_t gpuaddr;
+	uint64_t size;
+	unsigned long flags;
+	unsigned long priv;
+};
+
+struct kgsl_device {
+	struct device *dev;
+	const char *name;
+	unsigned int ver_major;
+	unsigned int ver_minor;
+	uint32_t flags;
+	enum kgsl_deviceid id;
+
+	/* Starting physical address for GPU registers */
+	unsigned long reg_phys;
+
+	/* Starting Kernel virtual address for GPU registers */
+	void __iomem *reg_virt;
+
+	/* Total memory size for all GPU registers */
+	unsigned int reg_len;
+
+	/* Kernel virtual address for GPU shader memory */
+	void __iomem *shader_mem_virt;
+
+	/* Starting physical address for GPU shader memory */
+	unsigned long shader_mem_phys;
+
+	/* GPU shader memory size */
+	unsigned int shader_mem_len;
+	struct kgsl_memdesc memstore;
+	struct kgsl_memdesc scratch;
+	const char *iomemname;
+	const char *shadermemname;
+
+	struct kgsl_mmu mmu;
+	struct completion hwaccess_gate;
+	struct completion halt_gate;
+	const struct kgsl_functable *ftbl;
+	struct work_struct idle_check_ws;
+	struct timer_list idle_timer;
+	struct kgsl_pwrctrl pwrctrl;
+	int open_count;
+
+	struct mutex mutex;
+	uint32_t state;
+	uint32_t requested_state;
+
+	atomic_t active_cnt;
+
+	wait_queue_head_t wait_queue;
+	wait_queue_head_t active_cnt_wq;
+	struct platform_device *pdev;
+	struct dentry *d_debugfs;
+	struct idr context_idr;
+	rwlock_t context_lock;
+
+	struct {
+		void *ptr;
+		size_t size;
+	} snapshot_memory;
+
+	struct kgsl_snapshot *snapshot;
+
+	u32 snapshot_faultcount;	/* Total number of faults since boot */
+	bool force_panic;		/* Force panic after snapshot dump */
+
+	/* Use CP Crash dumper to get GPU snapshot*/
+	bool snapshot_crashdumper;
+
+	struct kobject snapshot_kobj;
+
+	struct kobject ppd_kobj;
+
+	/* Logging levels */
+	int cmd_log;
+	int ctxt_log;
+	int drv_log;
+	int mem_log;
+	int pwr_log;
+	struct kgsl_pwrscale pwrscale;
+
+	int reset_counter; /* Track how many GPU core resets have occurred */
+	struct workqueue_struct *events_wq;
+
+	struct device *busmondev; /* pseudo dev for GPU BW voting governor */
+
+	/* Number of active contexts seen globally for this device */
+	int active_context_count;
+	struct kobject *gpu_sysfs_kobj;
+};
+
+#define KGSL_MMU_DEVICE(_mmu) \
+	container_of((_mmu), struct kgsl_device, mmu)
+
+#define KGSL_DEVICE_COMMON_INIT(_dev) \
+	.hwaccess_gate = COMPLETION_INITIALIZER((_dev).hwaccess_gate),\
+	.halt_gate = COMPLETION_INITIALIZER((_dev).halt_gate),\
+	.idle_check_ws = __WORK_INITIALIZER((_dev).idle_check_ws,\
+			kgsl_idle_check),\
+	.context_idr = IDR_INIT((_dev).context_idr),\
+	.wait_queue = __WAIT_QUEUE_HEAD_INITIALIZER((_dev).wait_queue),\
+	.active_cnt_wq = __WAIT_QUEUE_HEAD_INITIALIZER((_dev).active_cnt_wq),\
+	.mutex = __MUTEX_INITIALIZER((_dev).mutex),\
+	.state = KGSL_STATE_NONE,\
+	.ver_major = DRIVER_VERSION_MAJOR,\
+	.ver_minor = DRIVER_VERSION_MINOR
+
+
+/**
+ * enum bits for struct kgsl_context.priv
+ * @KGSL_CONTEXT_PRIV_DETACHED  - The context has been destroyed by userspace
+ *	and is no longer using the gpu.
+ * @KGSL_CONTEXT_PRIV_INVALID - The context has been destroyed by the kernel
+ *	because it caused a GPU fault.
+ * @KGSL_CONTEXT_PRIV_PAGEFAULT - The context has caused a page fault.
+ * @KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC - this value and higher values are
+ *	reserved for devices specific use.
+ */
+enum kgsl_context_priv {
+	KGSL_CONTEXT_PRIV_DETACHED = 0,
+	KGSL_CONTEXT_PRIV_INVALID,
+	KGSL_CONTEXT_PRIV_PAGEFAULT,
+	KGSL_CONTEXT_PRIV_DEVICE_SPECIFIC = 16,
+};
+
+struct kgsl_process_private;
+
+/**
+ * struct kgsl_context - The context fields that are valid for a user defined
+ * context
+ * @refcount: kref object for reference counting the context
+ * @id: integer identifier for the context
+ * @priority; The context's priority to submit commands to GPU
+ * @tid: task that created this context.
+ * @dev_priv: pointer to the owning device instance
+ * @proc_priv: pointer to process private, the process that allocated the
+ * context
+ * @priv: in-kernel context flags, use KGSL_CONTEXT_* values
+ * @reset_status: status indication whether a gpu reset occurred and whether
+ * this context was responsible for causing it
+ * @timeline: sync timeline used to create fences that can be signaled when a
+ * sync_pt timestamp expires
+ * @events: A kgsl_event_group for this context - contains the list of GPU
+ * events
+ * @flags: flags from userspace controlling the behavior of this context
+ * @pwr_constraint: power constraint from userspace for this context
+ * @fault_count: number of times gpu hanged in last _context_throttle_time ms
+ * @fault_time: time of the first gpu hang in last _context_throttle_time ms
+ */
+struct kgsl_context {
+	struct kref refcount;
+	uint32_t id;
+	uint32_t priority;
+	pid_t tid;
+	struct kgsl_device_private *dev_priv;
+	struct kgsl_process_private *proc_priv;
+	unsigned long priv;
+	struct kgsl_device *device;
+	unsigned int reset_status;
+	struct sync_timeline *timeline;
+	struct kgsl_event_group events;
+	unsigned int flags;
+	struct kgsl_pwr_constraint pwr_constraint;
+	unsigned int fault_count;
+	unsigned long fault_time;
+};
+
+#define _context_comm(_c) \
+	(((_c) && (_c)->proc_priv) ? (_c)->proc_priv->comm : "unknown")
+
+/*
+ * Print log messages with the context process name/pid:
+ * [...] kgsl kgsl-3d0: kgsl-api-test[22182]:
+ */
+
+#define pr_context(_d, _c, fmt, args...) \
+		dev_err((_d)->dev, "%s[%d]: " fmt, \
+		_context_comm((_c)), \
+		(_c)->proc_priv->pid, ##args)
+
+/**
+ * struct kgsl_process_private -  Private structure for a KGSL process (across
+ * all devices)
+ * @priv: Internal flags, use KGSL_PROCESS_* values
+ * @pid: ID for the task owner of the process
+ * @comm: task name of the process
+ * @mem_lock: Spinlock to protect the process memory lists
+ * @refcount: kref object for reference counting the process
+ * @idr: Iterator for assigning IDs to memory allocations
+ * @pagetable: Pointer to the pagetable owned by this process
+ * @kobj: Pointer to a kobj for the sysfs directory for this process
+ * @debug_root: Pointer to the debugfs root for this process
+ * @stats: Memory allocation statistics for this process
+ * @syncsource_idr: sync sources created by this process
+ * @syncsource_lock: Spinlock to protect the syncsource idr
+ * @fd_count: Counter for the number of FDs for this process
+ */
+struct kgsl_process_private {
+	unsigned long priv;
+	pid_t pid;
+	char comm[TASK_COMM_LEN];
+	spinlock_t mem_lock;
+	struct kref refcount;
+	struct idr mem_idr;
+	struct kgsl_pagetable *pagetable;
+	struct list_head list;
+	struct kobject kobj;
+	struct dentry *debug_root;
+	struct {
+		uint64_t cur;
+		uint64_t max;
+	} stats[KGSL_MEM_ENTRY_MAX];
+	struct idr syncsource_idr;
+	spinlock_t syncsource_lock;
+	int fd_count;
+};
+
+/**
+ * enum kgsl_process_priv_flags - Private flags for kgsl_process_private
+ * @KGSL_PROCESS_INIT: Set if the process structure has been set up
+ */
+enum kgsl_process_priv_flags {
+	KGSL_PROCESS_INIT = 0,
+};
+
+struct kgsl_device_private {
+	struct kgsl_device *device;
+	struct kgsl_process_private *process_priv;
+};
+
+/**
+ * struct kgsl_snapshot - details for a specific snapshot instance
+ * @ib1base: Active IB1 base address at the time of fault
+ * @ib2base: Active IB2 base address at the time of fault
+ * @ib1size: Number of DWORDS pending in IB1 at the time of fault
+ * @ib2size: Number of DWORDS pending in IB2 at the time of fault
+ * @ib1dumped: Active IB1 dump status to sansphot binary
+ * @ib2dumped: Active IB2 dump status to sansphot binary
+ * @start: Pointer to the start of the static snapshot region
+ * @size: Size of the current snapshot instance
+ * @ptr: Pointer to the next block of memory to write to during snapshotting
+ * @remain: Bytes left in the snapshot region
+ * @timestamp: Timestamp of the snapshot instance (in seconds since boot)
+ * @mempool: Pointer to the memory pool for storing memory objects
+ * @mempool_size: Size of the memory pool
+ * @obj_list: List of frozen GPU buffers that are waiting to be dumped.
+ * @cp_list: List of IB's to be dumped.
+ * @work: worker to dump the frozen memory
+ * @dump_gate: completion gate signaled by worker when it is finished.
+ * @process: the process that caused the hang, if known.
+ * @sysfs_read: An atomic for concurrent snapshot reads via syfs.
+ */
+struct kgsl_snapshot {
+	uint64_t ib1base;
+	uint64_t ib2base;
+	unsigned int ib1size;
+	unsigned int ib2size;
+	bool ib1dumped;
+	bool ib2dumped;
+	u8 *start;
+	size_t size;
+	u8 *ptr;
+	size_t remain;
+	unsigned long timestamp;
+	u8 *mempool;
+	size_t mempool_size;
+	struct list_head obj_list;
+	struct list_head cp_list;
+	struct work_struct work;
+	struct completion dump_gate;
+	struct kgsl_process_private *process;
+	atomic_t sysfs_read;
+};
+
+/**
+ * struct kgsl_snapshot_object  - GPU memory in the snapshot
+ * @gpuaddr: The GPU address identified during snapshot
+ * @size: The buffer size identified during snapshot
+ * @offset: offset from start of the allocated kgsl_mem_entry
+ * @type: SNAPSHOT_OBJ_TYPE_* identifier.
+ * @entry: the reference counted memory entry for this buffer
+ * @node: node for kgsl_snapshot.obj_list
+ */
+struct kgsl_snapshot_object {
+	uint64_t gpuaddr;
+	uint64_t size;
+	uint64_t offset;
+	int type;
+	struct kgsl_mem_entry *entry;
+	struct list_head node;
+};
+
+struct kgsl_device *kgsl_get_device(int dev_idx);
+
+static inline void kgsl_process_add_stats(struct kgsl_process_private *priv,
+	unsigned int type, uint64_t size)
+{
+	priv->stats[type].cur += size;
+	if (priv->stats[type].max < priv->stats[type].cur)
+		priv->stats[type].max = priv->stats[type].cur;
+}
+
+static inline void kgsl_regread(struct kgsl_device *device,
+				unsigned int offsetwords,
+				unsigned int *value)
+{
+	device->ftbl->regread(device, offsetwords, value);
+}
+
+static inline void kgsl_regwrite(struct kgsl_device *device,
+				 unsigned int offsetwords,
+				 unsigned int value)
+{
+	device->ftbl->regwrite(device, offsetwords, value);
+}
+
+static inline void kgsl_regrmw(struct kgsl_device *device,
+		unsigned int offsetwords,
+		unsigned int mask, unsigned int bits)
+{
+	unsigned int val = 0;
+
+	device->ftbl->regread(device, offsetwords, &val);
+	val &= ~mask;
+	device->ftbl->regwrite(device, offsetwords, val | bits);
+}
+
+static inline int kgsl_idle(struct kgsl_device *device)
+{
+	return device->ftbl->idle(device);
+}
+
+static inline unsigned int kgsl_gpuid(struct kgsl_device *device,
+	unsigned int *chipid)
+{
+	return device->ftbl->gpuid(device, chipid);
+}
+
+static inline int kgsl_create_device_sysfs_files(struct device *root,
+	const struct device_attribute **list)
+{
+	int ret = 0, i;
+
+	for (i = 0; list[i] != NULL; i++)
+		ret |= device_create_file(root, list[i]);
+	return ret;
+}
+
+static inline void kgsl_remove_device_sysfs_files(struct device *root,
+	const struct device_attribute **list)
+{
+	int i;
+
+	for (i = 0; list[i] != NULL; i++)
+		device_remove_file(root, list[i]);
+}
+
+static inline struct kgsl_device *kgsl_device_from_dev(struct device *dev)
+{
+	int i;
+
+	for (i = 0; i < KGSL_DEVICE_MAX; i++) {
+		if (kgsl_driver.devp[i] && kgsl_driver.devp[i]->dev == dev)
+			return kgsl_driver.devp[i];
+	}
+
+	return NULL;
+}
+
+static inline int kgsl_state_is_awake(struct kgsl_device *device)
+{
+	if (device->state == KGSL_STATE_ACTIVE ||
+		device->state == KGSL_STATE_AWARE)
+		return true;
+	else
+		return false;
+}
+
+int kgsl_readtimestamp(struct kgsl_device *device, void *priv,
+		enum kgsl_timestamp_type type, unsigned int *timestamp);
+
+int kgsl_check_timestamp(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int timestamp);
+
+int kgsl_device_platform_probe(struct kgsl_device *device);
+
+void kgsl_device_platform_remove(struct kgsl_device *device);
+
+const char *kgsl_pwrstate_to_str(unsigned int state);
+
+int kgsl_device_snapshot_init(struct kgsl_device *device);
+void kgsl_device_snapshot(struct kgsl_device *device,
+			struct kgsl_context *context);
+void kgsl_device_snapshot_close(struct kgsl_device *device);
+void kgsl_snapshot_save_frozen_objs(struct work_struct *work);
+
+void kgsl_events_init(void);
+void kgsl_events_exit(void);
+
+void kgsl_del_event_group(struct kgsl_event_group *group);
+
+void kgsl_add_event_group(struct kgsl_event_group *group,
+		struct kgsl_context *context, const char *name,
+		readtimestamp_func readtimestamp, void *priv);
+
+void kgsl_cancel_events_timestamp(struct kgsl_device *device,
+		struct kgsl_event_group *group, unsigned int timestamp);
+void kgsl_cancel_events(struct kgsl_device *device,
+		struct kgsl_event_group *group);
+void kgsl_cancel_event(struct kgsl_device *device,
+		struct kgsl_event_group *group, unsigned int timestamp,
+		kgsl_event_func func, void *priv);
+bool kgsl_event_pending(struct kgsl_device *device,
+		struct kgsl_event_group *group, unsigned int timestamp,
+		kgsl_event_func func, void *priv);
+int kgsl_add_event(struct kgsl_device *device, struct kgsl_event_group *group,
+		unsigned int timestamp, kgsl_event_func func, void *priv);
+void kgsl_process_event_group(struct kgsl_device *device,
+	struct kgsl_event_group *group);
+void kgsl_flush_event_group(struct kgsl_device *device,
+		struct kgsl_event_group *group);
+void kgsl_process_event_groups(struct kgsl_device *device);
+
+void kgsl_context_destroy(struct kref *kref);
+
+int kgsl_context_init(struct kgsl_device_private *dev_priv,
+		struct kgsl_context *context);
+
+void kgsl_context_dump(struct kgsl_context *context);
+
+int kgsl_memfree_find_entry(pid_t ptname, uint64_t *gpuaddr,
+	uint64_t *size, uint64_t *flags, pid_t *pid);
+
+long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg);
+
+long kgsl_ioctl_copy_in(unsigned int kernel_cmd, unsigned int user_cmd,
+		unsigned long arg, unsigned char *ptr);
+
+long kgsl_ioctl_copy_out(unsigned int kernel_cmd, unsigned int user_cmd,
+		unsigned long arg, unsigned char *ptr);
+
+/**
+ * kgsl_context_put() - Release context reference count
+ * @context: Pointer to the KGSL context to be released
+ *
+ * Reduce the reference count on a KGSL context and destroy it if it is no
+ * longer needed
+ */
+static inline void
+kgsl_context_put(struct kgsl_context *context)
+{
+	if (context)
+		kref_put(&context->refcount, kgsl_context_destroy);
+}
+
+/**
+ * kgsl_context_detached() - check if a context is detached
+ * @context: the context
+ *
+ * Check if a context has been destroyed by userspace and is only waiting
+ * for reference counts to go away. This check is used to weed out
+ * contexts that shouldn't use the gpu so NULL is considered detached.
+ */
+static inline bool kgsl_context_detached(struct kgsl_context *context)
+{
+	return (context == NULL || test_bit(KGSL_CONTEXT_PRIV_DETACHED,
+						&context->priv));
+}
+
+/**
+ * kgsl_context_invalid() - check if a context is invalid
+ * @context: the context
+ *
+ * Check if a context has been invalidated by the kernel and may no
+ * longer use the GPU.
+ */
+static inline bool kgsl_context_invalid(struct kgsl_context *context)
+{
+	return (context == NULL || test_bit(KGSL_CONTEXT_PRIV_INVALID,
+						&context->priv));
+}
+
+
+/**
+ * kgsl_context_get() - get a pointer to a KGSL context
+ * @device: Pointer to the KGSL device that owns the context
+ * @id: Context ID
+ *
+ * Find the context associated with the given ID number, increase the reference
+ * count on it and return it.  The caller must make sure that this call is
+ * paired with a kgsl_context_put.  This function is for internal use because it
+ * doesn't validate the ownership of the context with the calling process - use
+ * kgsl_context_get_owner for that
+ */
+static inline struct kgsl_context *kgsl_context_get(struct kgsl_device *device,
+		uint32_t id)
+{
+	int result = 0;
+	struct kgsl_context *context = NULL;
+
+	read_lock(&device->context_lock);
+
+	context = idr_find(&device->context_idr, id);
+
+	/* Don't return a context that has been detached */
+	if (kgsl_context_detached(context))
+		context = NULL;
+	else
+		result = kref_get_unless_zero(&context->refcount);
+
+	read_unlock(&device->context_lock);
+
+	if (!result)
+		return NULL;
+	return context;
+}
+
+/**
+ * _kgsl_context_get() - lightweight function to just increment the ref count
+ * @context: Pointer to the KGSL context
+ *
+ * Get a reference to the specified KGSL context structure. This is a
+ * lightweight way to just increase the refcount on a known context rather than
+ * walking through kgsl_context_get and searching the iterator
+ */
+static inline int _kgsl_context_get(struct kgsl_context *context)
+{
+	int ret = 0;
+
+	if (context)
+		ret = kref_get_unless_zero(&context->refcount);
+
+	return ret;
+}
+
+/**
+ * kgsl_context_get_owner() - get a pointer to a KGSL context in a specific
+ * process
+ * @dev_priv: Pointer to the process struct
+ * @id: Context ID to return
+ *
+ * Find the context associated with the given ID number, increase the reference
+ * count on it and return it.  The caller must make sure that this call is
+ * paired with a kgsl_context_put. This function validates that the context id
+ * given is owned by the dev_priv instancet that is passed in.  See
+ * kgsl_context_get for the internal version that doesn't do the check
+ */
+static inline struct kgsl_context *kgsl_context_get_owner(
+		struct kgsl_device_private *dev_priv, uint32_t id)
+{
+	struct kgsl_context *context;
+
+	context = kgsl_context_get(dev_priv->device, id);
+
+	/* Verify that the context belongs to current calling fd. */
+	if (context != NULL && context->dev_priv != dev_priv) {
+		kgsl_context_put(context);
+		return NULL;
+	}
+
+	return context;
+}
+
+/**
+ * kgsl_process_private_get() - increment the refcount on a
+ * kgsl_process_private struct
+ * @process: Pointer to the KGSL process_private
+ *
+ * Returns 0 if the structure is invalid and a reference count could not be
+ * obtained, nonzero otherwise.
+ */
+static inline int kgsl_process_private_get(struct kgsl_process_private *process)
+{
+	int ret = 0;
+
+	if (process != NULL)
+		ret = kref_get_unless_zero(&process->refcount);
+	return ret;
+}
+
+void kgsl_process_private_put(struct kgsl_process_private *private);
+
+
+struct kgsl_process_private *kgsl_process_private_find(pid_t pid);
+
+/**
+ * kgsl_property_read_u32() - Read a u32 property from the device tree
+ * @device: Pointer to the KGSL device
+ * @prop: String name of the property to query
+ * @ptr: Pointer to the variable to store the property
+ */
+static inline int kgsl_property_read_u32(struct kgsl_device *device,
+	const char *prop, unsigned int *ptr)
+{
+	return of_property_read_u32(device->pdev->dev.of_node, prop, ptr);
+}
+
+/**
+ * kgsl_sysfs_store() - parse a string from a sysfs store function
+ * @buf: Incoming string to parse
+ * @ptr: Pointer to an unsigned int to store the value
+ */
+static inline int kgsl_sysfs_store(const char *buf, unsigned int *ptr)
+{
+	unsigned int val;
+	int rc;
+
+	rc = kstrtou32(buf, 0, &val);
+	if (rc)
+		return rc;
+
+	if (ptr)
+		*ptr = val;
+
+	return 0;
+}
+
+/*
+ * A helper macro to print out "not enough memory functions" - this
+ * makes it easy to standardize the messages as well as cut down on
+ * the number of strings in the binary
+ */
+#define SNAPSHOT_ERR_NOMEM(_d, _s) \
+	KGSL_DRV_ERR((_d), \
+	"snapshot: not enough snapshot memory for section %s\n", (_s))
+
+/**
+ * struct kgsl_snapshot_registers - list of registers to snapshot
+ * @regs: Pointer to an array of register ranges
+ * @count: Number of entries in the array
+ */
+struct kgsl_snapshot_registers {
+	const unsigned int *regs;
+	unsigned int count;
+};
+
+size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf,
+		size_t remain, void *priv);
+
+void kgsl_snapshot_indexed_registers(struct kgsl_device *device,
+	struct kgsl_snapshot *snapshot, unsigned int index,
+	unsigned int data, unsigned int start, unsigned int count);
+
+int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot,
+	struct kgsl_process_private *process, uint64_t gpuaddr,
+	uint64_t size, unsigned int type);
+
+int kgsl_snapshot_have_object(struct kgsl_snapshot *snapshot,
+	struct kgsl_process_private *process,
+	uint64_t gpuaddr, uint64_t size);
+
+struct adreno_ib_object_list;
+
+int kgsl_snapshot_add_ib_obj_list(struct kgsl_snapshot *snapshot,
+	struct adreno_ib_object_list *ib_obj_list);
+
+void kgsl_snapshot_add_section(struct kgsl_device *device, u16 id,
+	struct kgsl_snapshot *snapshot,
+	size_t (*func)(struct kgsl_device *, u8 *, size_t, void *),
+	void *priv);
+
+/**
+ * struct kgsl_pwr_limit - limit structure for each client
+ * @node: Local list node for the limits list
+ * @level: requested power level
+ * @device: pointer to the device structure
+ */
+struct kgsl_pwr_limit {
+	struct list_head node;
+	unsigned int level;
+	struct kgsl_device *device;
+};
+
+#endif  /* __KGSL_DEVICE_H */
diff --git a/drivers/gpu/msm/kgsl_drawobj.c b/drivers/gpu/msm/kgsl_drawobj.c
new file mode 100644
index 0000000..01c3a06
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_drawobj.c
@@ -0,0 +1,1036 @@
+/* Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+/*
+ * KGSL drawobj management
+ * A drawobj is a single submission from userland.  The drawobj
+ * encapsulates everything about the submission : command buffers, flags and
+ * sync points.
+ *
+ * Sync points are events that need to expire before the
+ * drawobj can be queued to the hardware. All synpoints are contained in an
+ * array of kgsl_drawobj_sync_event structs in the drawobj. There can be
+ * multiple types of events both internal ones (GPU events) and external
+ * triggers. As the events expire bits are cleared in a pending bitmap stored
+ * in the drawobj. The GPU will submit the command as soon as the bitmap
+ * goes to zero indicating no more pending events.
+ */
+
+#include <linux/uaccess.h>
+#include <linux/list.h>
+#include <linux/compat.h>
+
+#include "kgsl.h"
+#include "kgsl_device.h"
+#include "kgsl_drawobj.h"
+#include "kgsl_sync.h"
+#include "kgsl_trace.h"
+#include "kgsl_compat.h"
+
+/*
+ * Define an kmem cache for the memobj structures since we allocate and free
+ * them so frequently
+ */
+static struct kmem_cache *memobjs_cache;
+
+static void drawobj_destroy_object(struct kref *kref)
+{
+	struct kgsl_drawobj *drawobj = container_of(kref,
+		struct kgsl_drawobj, refcount);
+	struct kgsl_drawobj_sync *syncobj;
+
+	kgsl_context_put(drawobj->context);
+
+	switch (drawobj->type) {
+	case SYNCOBJ_TYPE:
+		syncobj = SYNCOBJ(drawobj);
+		kfree(syncobj->synclist);
+		kfree(syncobj);
+		break;
+	case CMDOBJ_TYPE:
+	case MARKEROBJ_TYPE:
+		kfree(CMDOBJ(drawobj));
+		break;
+	}
+}
+
+static inline void drawobj_put(struct kgsl_drawobj *drawobj)
+{
+	if (drawobj)
+		kref_put(&drawobj->refcount, drawobj_destroy_object);
+}
+
+void kgsl_dump_syncpoints(struct kgsl_device *device,
+	struct kgsl_drawobj_sync *syncobj)
+{
+	struct kgsl_drawobj_sync_event *event;
+	unsigned int i;
+
+	for (i = 0; i < syncobj->numsyncs; i++) {
+		event = &syncobj->synclist[i];
+
+		if (!kgsl_drawobj_event_pending(syncobj, i))
+			continue;
+
+		switch (event->type) {
+		case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP: {
+			unsigned int retired;
+
+			 kgsl_readtimestamp(event->device,
+				event->context, KGSL_TIMESTAMP_RETIRED,
+				&retired);
+
+			dev_err(device->dev,
+				"  [timestamp] context %d timestamp %d (retired %d)\n",
+				event->context->id, event->timestamp,
+				retired);
+			break;
+		}
+		case KGSL_CMD_SYNCPOINT_TYPE_FENCE:
+			if (event->handle)
+				dev_err(device->dev, "  fence: [%pK] %s\n",
+					event->handle->fence,
+					event->handle->name);
+			else
+				dev_err(device->dev, "  fence: invalid\n");
+			break;
+		}
+	}
+}
+
+static void syncobj_timer(unsigned long data)
+{
+	struct kgsl_device *device;
+	struct kgsl_drawobj_sync *syncobj = (struct kgsl_drawobj_sync *) data;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj);
+	struct kgsl_drawobj_sync_event *event;
+	unsigned int i;
+
+	if (syncobj == NULL || drawobj->context == NULL)
+		return;
+
+	device = drawobj->context->device;
+
+	dev_err(device->dev,
+		"kgsl: possible gpu syncpoint deadlock for context %d timestamp %d\n",
+		drawobj->context->id, drawobj->timestamp);
+
+	set_bit(ADRENO_CONTEXT_FENCE_LOG, &drawobj->context->priv);
+	kgsl_context_dump(drawobj->context);
+	clear_bit(ADRENO_CONTEXT_FENCE_LOG, &drawobj->context->priv);
+
+	dev_err(device->dev, "      pending events:\n");
+
+	for (i = 0; i < syncobj->numsyncs; i++) {
+		event = &syncobj->synclist[i];
+
+		if (!kgsl_drawobj_event_pending(syncobj, i))
+			continue;
+
+		switch (event->type) {
+		case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP:
+			dev_err(device->dev, "       [%d] TIMESTAMP %d:%d\n",
+				i, event->context->id, event->timestamp);
+			break;
+		case KGSL_CMD_SYNCPOINT_TYPE_FENCE:
+			if (event->handle != NULL) {
+				dev_err(device->dev, "       [%d] FENCE %s\n",
+				i, event->handle->fence ?
+					event->handle->fence->name : "NULL");
+				kgsl_sync_fence_log(event->handle->fence);
+			}
+			break;
+		}
+	}
+
+	dev_err(device->dev, "--gpu syncpoint deadlock print end--\n");
+}
+
+/*
+ * a generic function to retire a pending sync event and (possibly)
+ * kick the dispatcher
+ */
+static void drawobj_sync_expire(struct kgsl_device *device,
+	struct kgsl_drawobj_sync_event *event)
+{
+	struct kgsl_drawobj_sync *syncobj = event->syncobj;
+	/*
+	 * Clear the event from the pending mask - if it is already clear, then
+	 * leave without doing anything useful
+	 */
+	if (!test_and_clear_bit(event->id, &syncobj->pending))
+		return;
+
+	/*
+	 * If no more pending events, delete the timer and schedule the command
+	 * for dispatch
+	 */
+	if (!kgsl_drawobj_events_pending(event->syncobj)) {
+		del_timer_sync(&syncobj->timer);
+
+		if (device->ftbl->drawctxt_sched)
+			device->ftbl->drawctxt_sched(device,
+				event->syncobj->base.context);
+	}
+}
+
+/*
+ * This function is called by the GPU event when the sync event timestamp
+ * expires
+ */
+static void drawobj_sync_func(struct kgsl_device *device,
+		struct kgsl_event_group *group, void *priv, int result)
+{
+	struct kgsl_drawobj_sync_event *event = priv;
+
+	trace_syncpoint_timestamp_expire(event->syncobj,
+		event->context, event->timestamp);
+
+	drawobj_sync_expire(device, event);
+	kgsl_context_put(event->context);
+	drawobj_put(&event->syncobj->base);
+}
+
+static inline void memobj_list_free(struct list_head *list)
+{
+	struct kgsl_memobj_node *mem, *tmpmem;
+
+	/* Free the cmd mem here */
+	list_for_each_entry_safe(mem, tmpmem, list, node) {
+		list_del_init(&mem->node);
+		kmem_cache_free(memobjs_cache, mem);
+	}
+}
+
+static void drawobj_destroy_sync(struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_drawobj_sync *syncobj = SYNCOBJ(drawobj);
+	unsigned long pending;
+	unsigned int i;
+
+	/* Zap the canary timer */
+	del_timer_sync(&syncobj->timer);
+
+	/*
+	 * Copy off the pending list and clear all pending events - this will
+	 * render any subsequent asynchronous callback harmless
+	 */
+	bitmap_copy(&pending, &syncobj->pending, KGSL_MAX_SYNCPOINTS);
+	bitmap_zero(&syncobj->pending, KGSL_MAX_SYNCPOINTS);
+
+	/*
+	 * Clear all pending events - this will render any subsequent async
+	 * callbacks harmless
+	 */
+	for (i = 0; i < syncobj->numsyncs; i++) {
+		struct kgsl_drawobj_sync_event *event = &syncobj->synclist[i];
+
+		/* Don't do anything if the event has already expired */
+		if (!test_bit(i, &pending))
+			continue;
+
+		switch (event->type) {
+		case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP:
+			kgsl_cancel_event(drawobj->device,
+				&event->context->events, event->timestamp,
+				drawobj_sync_func, event);
+			break;
+		case KGSL_CMD_SYNCPOINT_TYPE_FENCE:
+			if (kgsl_sync_fence_async_cancel(event->handle))
+				drawobj_put(drawobj);
+			break;
+		}
+	}
+
+	/*
+	 * If we cancelled an event, there's a good chance that the context is
+	 * on a dispatcher queue, so schedule to get it removed.
+	 */
+	if (!bitmap_empty(&pending, KGSL_MAX_SYNCPOINTS) &&
+		drawobj->device->ftbl->drawctxt_sched)
+		drawobj->device->ftbl->drawctxt_sched(drawobj->device,
+							drawobj->context);
+
+}
+
+static void drawobj_destroy_cmd(struct kgsl_drawobj *drawobj)
+{
+	struct kgsl_drawobj_cmd *cmdobj = CMDOBJ(drawobj);
+
+	/*
+	 * Release the refcount on the mem entry associated with the
+	 * ib profiling buffer
+	 */
+	if (cmdobj->base.flags & KGSL_DRAWOBJ_PROFILING)
+		kgsl_mem_entry_put(cmdobj->profiling_buf_entry);
+
+	/* Destroy the cmdlist we created */
+	memobj_list_free(&cmdobj->cmdlist);
+
+	/* Destroy the memlist we created */
+	memobj_list_free(&cmdobj->memlist);
+}
+
+/**
+ * kgsl_drawobj_destroy() - Destroy a kgsl object structure
+ * @obj: Pointer to the kgsl object to destroy
+ *
+ * Start the process of destroying a command batch.  Cancel any pending events
+ * and decrement the refcount.  Asynchronous events can still signal after
+ * kgsl_drawobj_destroy has returned.
+ */
+void kgsl_drawobj_destroy(struct kgsl_drawobj *drawobj)
+{
+	if (!drawobj)
+		return;
+
+	if (drawobj->type & SYNCOBJ_TYPE)
+		drawobj_destroy_sync(drawobj);
+	else if (drawobj->type & (CMDOBJ_TYPE | MARKEROBJ_TYPE))
+		drawobj_destroy_cmd(drawobj);
+	else
+		return;
+
+	drawobj_put(drawobj);
+}
+EXPORT_SYMBOL(kgsl_drawobj_destroy);
+
+static void drawobj_sync_fence_func(void *priv)
+{
+	struct kgsl_drawobj_sync_event *event = priv;
+
+	trace_syncpoint_fence_expire(event->syncobj,
+		event->handle ? event->handle->name : "unknown");
+
+	drawobj_sync_expire(event->device, event);
+
+	drawobj_put(&event->syncobj->base);
+}
+
+/* drawobj_add_sync_fence() - Add a new sync fence syncpoint
+ * @device: KGSL device
+ * @syncobj: KGSL sync obj to add the sync point to
+ * @priv: Private structure passed by the user
+ *
+ * Add a new fence sync syncpoint to the sync obj.
+ */
+static int drawobj_add_sync_fence(struct kgsl_device *device,
+		struct kgsl_drawobj_sync *syncobj, void *priv)
+{
+	struct kgsl_cmd_syncpoint_fence *sync = priv;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj);
+	struct kgsl_drawobj_sync_event *event;
+	unsigned int id;
+
+	kref_get(&drawobj->refcount);
+
+	id = syncobj->numsyncs++;
+
+	event = &syncobj->synclist[id];
+
+	event->id = id;
+	event->type = KGSL_CMD_SYNCPOINT_TYPE_FENCE;
+	event->syncobj = syncobj;
+	event->device = device;
+	event->context = NULL;
+
+	set_bit(event->id, &syncobj->pending);
+
+	event->handle = kgsl_sync_fence_async_wait(sync->fd,
+		drawobj_sync_fence_func, event);
+
+	if (IS_ERR_OR_NULL(event->handle)) {
+		int ret = PTR_ERR(event->handle);
+
+		clear_bit(event->id, &syncobj->pending);
+		event->handle = NULL;
+
+		drawobj_put(drawobj);
+
+		/*
+		 * If ret == 0 the fence was already signaled - print a trace
+		 * message so we can track that
+		 */
+		if (ret == 0)
+			trace_syncpoint_fence_expire(syncobj, "signaled");
+
+		return ret;
+	}
+
+	trace_syncpoint_fence(syncobj, event->handle->name);
+
+	return 0;
+}
+
+/* drawobj_add_sync_timestamp() - Add a new sync point for a sync obj
+ * @device: KGSL device
+ * @syncobj: KGSL sync obj to add the sync point to
+ * @priv: Private structure passed by the user
+ *
+ * Add a new sync point timestamp event to the sync obj.
+ */
+static int drawobj_add_sync_timestamp(struct kgsl_device *device,
+		struct kgsl_drawobj_sync *syncobj, void *priv)
+{
+	struct kgsl_cmd_syncpoint_timestamp *sync = priv;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj);
+	struct kgsl_context *context = kgsl_context_get(device,
+		sync->context_id);
+	struct kgsl_drawobj_sync_event *event;
+	int ret = -EINVAL;
+	unsigned int id;
+
+	if (context == NULL)
+		return -EINVAL;
+
+	/*
+	 * We allow somebody to create a sync point on their own context.
+	 * This has the effect of delaying a command from submitting until the
+	 * dependent command has cleared.  That said we obviously can't let them
+	 * create a sync point on a future timestamp.
+	 */
+
+	if (context == drawobj->context) {
+		unsigned int queued;
+
+		kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED,
+			&queued);
+
+		if (timestamp_cmp(sync->timestamp, queued) > 0) {
+			KGSL_DRV_ERR(device,
+			"Cannot create syncpoint for future timestamp %d (current %d)\n",
+				sync->timestamp, queued);
+			goto done;
+		}
+	}
+
+	kref_get(&drawobj->refcount);
+
+	id = syncobj->numsyncs++;
+
+	event = &syncobj->synclist[id];
+	event->id = id;
+
+	event->type = KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP;
+	event->syncobj = syncobj;
+	event->context = context;
+	event->timestamp = sync->timestamp;
+	event->device = device;
+
+	set_bit(event->id, &syncobj->pending);
+
+	ret = kgsl_add_event(device, &context->events, sync->timestamp,
+		drawobj_sync_func, event);
+
+	if (ret) {
+		clear_bit(event->id, &syncobj->pending);
+		drawobj_put(drawobj);
+	} else {
+		trace_syncpoint_timestamp(syncobj, context, sync->timestamp);
+	}
+
+done:
+	if (ret)
+		kgsl_context_put(context);
+
+	return ret;
+}
+
+/**
+ * kgsl_drawobj_sync_add_sync() - Add a sync point to a command
+ * batch
+ * @device: Pointer to the KGSL device struct for the GPU
+ * @syncobj: Pointer to the sync obj
+ * @sync: Pointer to the user-specified struct defining the syncpoint
+ *
+ * Create a new sync point in the sync obj based on the
+ * user specified parameters
+ */
+int kgsl_drawobj_sync_add_sync(struct kgsl_device *device,
+	struct kgsl_drawobj_sync *syncobj,
+	struct kgsl_cmd_syncpoint *sync)
+{
+	void *priv;
+	int ret, psize;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(syncobj);
+	int (*func)(struct kgsl_device *device,
+			struct kgsl_drawobj_sync *syncobj,
+			void *priv);
+
+	switch (sync->type) {
+	case KGSL_CMD_SYNCPOINT_TYPE_TIMESTAMP:
+		psize = sizeof(struct kgsl_cmd_syncpoint_timestamp);
+		func = drawobj_add_sync_timestamp;
+		break;
+	case KGSL_CMD_SYNCPOINT_TYPE_FENCE:
+		psize = sizeof(struct kgsl_cmd_syncpoint_fence);
+		func = drawobj_add_sync_fence;
+		break;
+	default:
+		KGSL_DRV_ERR(device,
+			"bad syncpoint type ctxt %d type 0x%x size %zu\n",
+			drawobj->context->id, sync->type, sync->size);
+		return -EINVAL;
+	}
+
+	if (sync->size != psize) {
+		KGSL_DRV_ERR(device,
+			"bad syncpoint size ctxt %d type 0x%x size %zu\n",
+			drawobj->context->id, sync->type, sync->size);
+		return -EINVAL;
+	}
+
+	priv = kzalloc(sync->size, GFP_KERNEL);
+	if (priv == NULL)
+		return -ENOMEM;
+
+	if (copy_from_user(priv, sync->priv, sync->size)) {
+		kfree(priv);
+		return -EFAULT;
+	}
+
+	ret = func(device, syncobj, priv);
+	kfree(priv);
+
+	return ret;
+}
+
+static void add_profiling_buffer(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd *cmdobj,
+		uint64_t gpuaddr, uint64_t size,
+		unsigned int id, uint64_t offset)
+{
+	struct kgsl_mem_entry *entry;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+	if (!(drawobj->flags & KGSL_DRAWOBJ_PROFILING))
+		return;
+
+	/* Only the first buffer entry counts - ignore the rest */
+	if (cmdobj->profiling_buf_entry != NULL)
+		return;
+
+	if (id != 0)
+		entry = kgsl_sharedmem_find_id(drawobj->context->proc_priv,
+				id);
+	else
+		entry = kgsl_sharedmem_find(drawobj->context->proc_priv,
+			gpuaddr);
+
+	if (entry != NULL) {
+		if (!kgsl_gpuaddr_in_memdesc(&entry->memdesc, gpuaddr, size)) {
+			kgsl_mem_entry_put(entry);
+			entry = NULL;
+		}
+	}
+
+	if (entry == NULL) {
+		KGSL_DRV_ERR(device,
+			"ignore bad profile buffer ctxt %d id %d offset %lld gpuaddr %llx size %lld\n",
+			drawobj->context->id, id, offset, gpuaddr, size);
+		return;
+	}
+
+	cmdobj->profiling_buf_entry = entry;
+
+	if (id != 0)
+		cmdobj->profiling_buffer_gpuaddr =
+			entry->memdesc.gpuaddr + offset;
+	else
+		cmdobj->profiling_buffer_gpuaddr = gpuaddr;
+}
+
+/**
+ * kgsl_drawobj_cmd_add_ibdesc() - Add a legacy ibdesc to a command
+ * batch
+ * @cmdobj: Pointer to the ib
+ * @ibdesc: Pointer to the user-specified struct defining the memory or IB
+ *
+ * Create a new memory entry in the ib based on the
+ * user specified parameters
+ */
+int kgsl_drawobj_cmd_add_ibdesc(struct kgsl_device *device,
+	struct kgsl_drawobj_cmd *cmdobj, struct kgsl_ibdesc *ibdesc)
+{
+	uint64_t gpuaddr = (uint64_t) ibdesc->gpuaddr;
+	uint64_t size = (uint64_t) ibdesc->sizedwords << 2;
+	struct kgsl_memobj_node *mem;
+	struct kgsl_drawobj *drawobj = DRAWOBJ(cmdobj);
+
+	/* sanitize the ibdesc ctrl flags */
+	ibdesc->ctrl &= KGSL_IBDESC_MEMLIST | KGSL_IBDESC_PROFILING_BUFFER;
+
+	if (drawobj->flags & KGSL_DRAWOBJ_MEMLIST &&
+			ibdesc->ctrl & KGSL_IBDESC_MEMLIST) {
+		if (ibdesc->ctrl & KGSL_IBDESC_PROFILING_BUFFER) {
+			add_profiling_buffer(device, cmdobj,
+					gpuaddr, size, 0, 0);
+			return 0;
+		}
+	}
+
+	/* Ignore if SYNC or MARKER is specified */
+	if (drawobj->type & (SYNCOBJ_TYPE | MARKEROBJ_TYPE))
+		return 0;
+
+	mem = kmem_cache_alloc(memobjs_cache, GFP_KERNEL);
+	if (mem == NULL)
+		return -ENOMEM;
+
+	mem->gpuaddr = gpuaddr;
+	mem->size = size;
+	mem->priv = 0;
+	mem->id = 0;
+	mem->offset = 0;
+	mem->flags = 0;
+
+	if (drawobj->flags & KGSL_DRAWOBJ_MEMLIST &&
+			ibdesc->ctrl & KGSL_IBDESC_MEMLIST)
+		/* add to the memlist */
+		list_add_tail(&mem->node, &cmdobj->memlist);
+	else {
+		/* set the preamble flag if directed to */
+		if (drawobj->context->flags & KGSL_CONTEXT_PREAMBLE &&
+				list_empty(&cmdobj->cmdlist))
+			mem->flags = KGSL_CMDLIST_CTXTSWITCH_PREAMBLE;
+
+		/* add to the cmd list */
+		list_add_tail(&mem->node, &cmdobj->cmdlist);
+	}
+
+	return 0;
+}
+
+static inline int drawobj_init(struct kgsl_device *device,
+	struct kgsl_context *context, struct kgsl_drawobj *drawobj,
+	unsigned int type)
+{
+	/*
+	 * Increase the reference count on the context so it doesn't disappear
+	 * during the lifetime of this object
+	 */
+	if (!_kgsl_context_get(context))
+		return -ENOENT;
+
+	kref_init(&drawobj->refcount);
+
+	drawobj->device = device;
+	drawobj->context = context;
+	drawobj->type = type;
+
+	return 0;
+}
+
+/**
+ * kgsl_drawobj_sync_create() - Create a new sync obj
+ * structure
+ * @device: Pointer to a KGSL device struct
+ * @context: Pointer to a KGSL context struct
+ *
+ * Allocate an new kgsl_drawobj_sync structure
+ */
+struct kgsl_drawobj_sync *kgsl_drawobj_sync_create(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	struct kgsl_drawobj_sync *syncobj = kzalloc(sizeof(*syncobj),
+							GFP_KERNEL);
+	if (syncobj == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	if (drawobj_init(device, context, DRAWOBJ(syncobj), SYNCOBJ_TYPE)) {
+		kfree(syncobj);
+		return ERR_PTR(-ENOENT);
+	}
+
+	/* Add a timer to help debug sync deadlocks */
+	setup_timer(&syncobj->timer, syncobj_timer, (unsigned long) syncobj);
+
+	return syncobj;
+}
+
+/**
+ * kgsl_drawobj_cmd_create() - Create a new command obj
+ * structure
+ * @device: Pointer to a KGSL device struct
+ * @context: Pointer to a KGSL context struct
+ * @flags: Flags for the command obj
+ * @type: type of cmdobj MARKER/CMD
+ *
+ * Allocate a new kgsl_drawobj_cmd structure
+ */
+struct kgsl_drawobj_cmd *kgsl_drawobj_cmd_create(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int flags,
+		unsigned int type)
+{
+	struct kgsl_drawobj_cmd *cmdobj = kzalloc(sizeof(*cmdobj), GFP_KERNEL);
+	struct kgsl_drawobj *drawobj;
+
+	if (cmdobj == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	type &= CMDOBJ_TYPE | MARKEROBJ_TYPE;
+	if (type == 0) {
+		kfree(cmdobj);
+		return ERR_PTR(-EINVAL);
+	}
+
+	drawobj = DRAWOBJ(cmdobj);
+
+	if (drawobj_init(device, context, drawobj, type)) {
+		kfree(cmdobj);
+		return ERR_PTR(-ENOENT);
+	}
+
+	/* sanitize our flags for drawobj's */
+	drawobj->flags = flags & (KGSL_DRAWOBJ_CTX_SWITCH
+				| KGSL_DRAWOBJ_MARKER
+				| KGSL_DRAWOBJ_END_OF_FRAME
+				| KGSL_DRAWOBJ_PWR_CONSTRAINT
+				| KGSL_DRAWOBJ_MEMLIST
+				| KGSL_DRAWOBJ_PROFILING
+				| KGSL_DRAWOBJ_PROFILING_KTIME);
+
+	INIT_LIST_HEAD(&cmdobj->cmdlist);
+	INIT_LIST_HEAD(&cmdobj->memlist);
+
+	return cmdobj;
+}
+
+#ifdef CONFIG_COMPAT
+static int add_ibdesc_list_compat(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, int count)
+{
+	int i, ret = 0;
+	struct kgsl_ibdesc_compat ibdesc32;
+	struct kgsl_ibdesc ibdesc;
+
+	for (i = 0; i < count; i++) {
+		memset(&ibdesc32, 0, sizeof(ibdesc32));
+
+		if (copy_from_user(&ibdesc32, ptr, sizeof(ibdesc32))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		ibdesc.gpuaddr = (unsigned long) ibdesc32.gpuaddr;
+		ibdesc.sizedwords = (size_t) ibdesc32.sizedwords;
+		ibdesc.ctrl = (unsigned int) ibdesc32.ctrl;
+
+		ret = kgsl_drawobj_cmd_add_ibdesc(device, cmdobj, &ibdesc);
+		if (ret)
+			break;
+
+		ptr += sizeof(ibdesc32);
+	}
+
+	return ret;
+}
+
+static int add_syncpoints_compat(struct kgsl_device *device,
+		struct kgsl_drawobj_sync *syncobj, void __user *ptr, int count)
+{
+	struct kgsl_cmd_syncpoint_compat sync32;
+	struct kgsl_cmd_syncpoint sync;
+	int i, ret = 0;
+
+	for (i = 0; i < count; i++) {
+		memset(&sync32, 0, sizeof(sync32));
+
+		if (copy_from_user(&sync32, ptr, sizeof(sync32))) {
+			ret = -EFAULT;
+			break;
+		}
+
+		sync.type = sync32.type;
+		sync.priv = compat_ptr(sync32.priv);
+		sync.size = (size_t) sync32.size;
+
+		ret = kgsl_drawobj_sync_add_sync(device, syncobj, &sync);
+		if (ret)
+			break;
+
+		ptr += sizeof(sync32);
+	}
+
+	return ret;
+}
+#else
+static int add_ibdesc_list_compat(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, int count)
+{
+	return -EINVAL;
+}
+
+static int add_syncpoints_compat(struct kgsl_device *device,
+		struct kgsl_drawobj_sync *syncobj, void __user *ptr, int count)
+{
+	return -EINVAL;
+}
+#endif
+
+/* Returns:
+ *   -EINVAL: Bad data
+ *   0: All data fields are empty (nothing to do)
+ *   1: All list information is valid
+ */
+static int _verify_input_list(unsigned int count, void __user *ptr,
+		unsigned int size)
+{
+	/* Return early if nothing going on */
+	if (count == 0 && ptr == NULL && size == 0)
+		return 0;
+
+	/* Sanity check inputs */
+	if (count == 0 || ptr == NULL || size == 0)
+		return -EINVAL;
+
+	return 1;
+}
+
+int kgsl_drawobj_cmd_add_ibdesc_list(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, int count)
+{
+	struct kgsl_ibdesc ibdesc;
+	struct kgsl_drawobj *baseobj = DRAWOBJ(cmdobj);
+	int i, ret;
+
+	/* Ignore everything if this is a MARKER */
+	if (baseobj->type & MARKEROBJ_TYPE)
+		return 0;
+
+	ret = _verify_input_list(count, ptr, sizeof(ibdesc));
+	if (ret <= 0)
+		return -EINVAL;
+
+	if (is_compat_task())
+		return add_ibdesc_list_compat(device, cmdobj, ptr, count);
+
+	for (i = 0; i < count; i++) {
+		memset(&ibdesc, 0, sizeof(ibdesc));
+
+		if (copy_from_user(&ibdesc, ptr, sizeof(ibdesc)))
+			return -EFAULT;
+
+		ret = kgsl_drawobj_cmd_add_ibdesc(device, cmdobj, &ibdesc);
+		if (ret)
+			return ret;
+
+		ptr += sizeof(ibdesc);
+	}
+
+	return 0;
+}
+
+int kgsl_drawobj_sync_add_syncpoints(struct kgsl_device *device,
+		struct kgsl_drawobj_sync *syncobj, void __user *ptr, int count)
+{
+	struct kgsl_cmd_syncpoint sync;
+	int i, ret;
+
+	if (count == 0)
+		return 0;
+
+	syncobj->synclist = kcalloc(count,
+		sizeof(struct kgsl_drawobj_sync_event), GFP_KERNEL);
+
+	if (syncobj->synclist == NULL)
+		return -ENOMEM;
+
+	if (is_compat_task())
+		return add_syncpoints_compat(device, syncobj, ptr, count);
+
+	for (i = 0; i < count; i++) {
+		memset(&sync, 0, sizeof(sync));
+
+		if (copy_from_user(&sync, ptr, sizeof(sync)))
+			return -EFAULT;
+
+		ret = kgsl_drawobj_sync_add_sync(device, syncobj, &sync);
+		if (ret)
+			return ret;
+
+		ptr += sizeof(sync);
+	}
+
+	return 0;
+}
+
+static int drawobj_add_object(struct list_head *head,
+		struct kgsl_command_object *obj)
+{
+	struct kgsl_memobj_node *mem;
+
+	mem = kmem_cache_alloc(memobjs_cache, GFP_KERNEL);
+	if (mem == NULL)
+		return -ENOMEM;
+
+	mem->gpuaddr = obj->gpuaddr;
+	mem->size = obj->size;
+	mem->id = obj->id;
+	mem->offset = obj->offset;
+	mem->flags = obj->flags;
+	mem->priv = 0;
+
+	list_add_tail(&mem->node, head);
+	return 0;
+}
+
+#define CMDLIST_FLAGS \
+	(KGSL_CMDLIST_IB | \
+	 KGSL_CMDLIST_CTXTSWITCH_PREAMBLE | \
+	 KGSL_CMDLIST_IB_PREAMBLE)
+
+/* This can only accept MARKEROBJ_TYPE and CMDOBJ_TYPE */
+int kgsl_drawobj_cmd_add_cmdlist(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd *cmdobj, void __user *ptr,
+		unsigned int size, unsigned int count)
+{
+	struct kgsl_command_object obj;
+	struct kgsl_drawobj *baseobj = DRAWOBJ(cmdobj);
+	int i, ret;
+
+	/* Ignore everything if this is a MARKER */
+	if (baseobj->type & MARKEROBJ_TYPE)
+		return 0;
+
+	ret = _verify_input_list(count, ptr, size);
+	if (ret <= 0)
+		return ret;
+
+	for (i = 0; i < count; i++) {
+		memset(&obj, 0, sizeof(obj));
+
+		ret = _copy_from_user(&obj, ptr, sizeof(obj), size);
+		if (ret)
+			return ret;
+
+		/* Sanity check the flags */
+		if (!(obj.flags & CMDLIST_FLAGS)) {
+			KGSL_DRV_ERR(device,
+				"invalid cmdobj ctxt %d flags %d id %d offset %lld addr %lld size %lld\n",
+				baseobj->context->id, obj.flags, obj.id,
+				obj.offset, obj.gpuaddr, obj.size);
+			return -EINVAL;
+		}
+
+		ret = drawobj_add_object(&cmdobj->cmdlist, &obj);
+		if (ret)
+			return ret;
+
+		ptr += sizeof(obj);
+	}
+
+	return 0;
+}
+
+int kgsl_drawobj_cmd_add_memlist(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd *cmdobj, void __user *ptr,
+		unsigned int size, unsigned int count)
+{
+	struct kgsl_command_object obj;
+	struct kgsl_drawobj *baseobj = DRAWOBJ(cmdobj);
+	int i, ret;
+
+	/* Ignore everything if this is a MARKER */
+	if (baseobj->type & MARKEROBJ_TYPE)
+		return 0;
+
+	ret = _verify_input_list(count, ptr, size);
+	if (ret <= 0)
+		return ret;
+
+	for (i = 0; i < count; i++) {
+		memset(&obj, 0, sizeof(obj));
+
+		ret = _copy_from_user(&obj, ptr, sizeof(obj), size);
+		if (ret)
+			return ret;
+
+		if (!(obj.flags & KGSL_OBJLIST_MEMOBJ)) {
+			KGSL_DRV_ERR(device,
+				"invalid memobj ctxt %d flags %d id %d offset %lld addr %lld size %lld\n",
+				DRAWOBJ(cmdobj)->context->id, obj.flags,
+				obj.id, obj.offset, obj.gpuaddr, obj.size);
+			return -EINVAL;
+		}
+
+		if (obj.flags & KGSL_OBJLIST_PROFILE)
+			add_profiling_buffer(device, cmdobj, obj.gpuaddr,
+				obj.size, obj.id, obj.offset);
+		else {
+			ret = drawobj_add_object(&cmdobj->memlist, &obj);
+			if (ret)
+				return ret;
+		}
+
+		ptr += sizeof(obj);
+	}
+
+	return 0;
+}
+
+int kgsl_drawobj_sync_add_synclist(struct kgsl_device *device,
+		struct kgsl_drawobj_sync *syncobj, void __user *ptr,
+		unsigned int size, unsigned int count)
+{
+	struct kgsl_command_syncpoint syncpoint;
+	struct kgsl_cmd_syncpoint sync;
+	int i, ret;
+
+	/* If creating a sync and the data is not there or wrong then error */
+	ret = _verify_input_list(count, ptr, size);
+	if (ret <= 0)
+		return -EINVAL;
+
+	syncobj->synclist = kcalloc(count,
+		sizeof(struct kgsl_drawobj_sync_event), GFP_KERNEL);
+
+	if (syncobj->synclist == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		memset(&syncpoint, 0, sizeof(syncpoint));
+
+		ret = _copy_from_user(&syncpoint, ptr, sizeof(syncpoint), size);
+		if (ret)
+			return ret;
+
+		sync.type = syncpoint.type;
+		sync.priv = to_user_ptr(syncpoint.priv);
+		sync.size = syncpoint.size;
+
+		ret = kgsl_drawobj_sync_add_sync(device, syncobj, &sync);
+		if (ret)
+			return ret;
+
+		ptr += sizeof(syncpoint);
+	}
+
+	return 0;
+}
+
+void kgsl_drawobj_exit(void)
+{
+	if (memobjs_cache != NULL)
+		kmem_cache_destroy(memobjs_cache);
+}
+
+int kgsl_drawobj_init(void)
+{
+	memobjs_cache = KMEM_CACHE(kgsl_memobj_node, 0);
+	if (memobjs_cache == NULL) {
+		KGSL_CORE_ERR("failed to create memobjs_cache");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
diff --git a/drivers/gpu/msm/kgsl_drawobj.h b/drivers/gpu/msm/kgsl_drawobj.h
new file mode 100644
index 0000000..89ed944
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_drawobj.h
@@ -0,0 +1,198 @@
+/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __KGSL_DRAWOBJ_H
+#define __KGSL_DRAWOBJ_H
+
+#define DRAWOBJ(obj) (&obj->base)
+#define SYNCOBJ(obj) \
+	container_of(obj, struct kgsl_drawobj_sync, base)
+#define CMDOBJ(obj) \
+	container_of(obj, struct kgsl_drawobj_cmd, base)
+
+#define CMDOBJ_TYPE     BIT(0)
+#define MARKEROBJ_TYPE  BIT(1)
+#define SYNCOBJ_TYPE    BIT(2)
+
+/**
+ * struct kgsl_drawobj - KGSL drawobj descriptor
+ * @device: KGSL GPU device that the command was created for
+ * @context: KGSL context that created the command
+ * @type: Object type
+ * @timestamp: Timestamp assigned to the command
+ * @flags: flags
+ * @refcount: kref structure to maintain the reference count
+ */
+struct kgsl_drawobj {
+	struct kgsl_device *device;
+	struct kgsl_context *context;
+	uint32_t type;
+	uint32_t timestamp;
+	unsigned long flags;
+	struct kref refcount;
+};
+
+/**
+ * struct kgsl_drawobj_cmd - KGSL command obj, This covers marker
+ * cmds also since markers are special form of cmds that do not
+ * need their cmds to be executed.
+ * @base: Base kgsl_drawobj
+ * @priv: Internal flags
+ * @global_ts: The ringbuffer timestamp corresponding to this
+ *    command obj
+ * @fault_policy: Internal policy describing how to handle this command in case
+ * of a fault
+ * @fault_recovery: recovery actions actually tried for this batch
+ *     be hung
+ * @refcount: kref structure to maintain the reference count
+ * @cmdlist: List of IBs to issue
+ * @memlist: List of all memory used in this command batch
+ * @marker_timestamp: For markers, the timestamp of the last "real" command that
+ * was queued
+ * @profiling_buf_entry: Mem entry containing the profiling buffer
+ * @profiling_buffer_gpuaddr: GPU virt address of the profile buffer added here
+ * for easy access
+ * @profile_index: Index to store the start/stop ticks in the kernel profiling
+ * buffer
+ * @submit_ticks: Variable to hold ticks at the time of
+ *     command obj submit.
+
+ */
+struct kgsl_drawobj_cmd {
+	struct kgsl_drawobj base;
+	unsigned long priv;
+	unsigned int global_ts;
+	unsigned long fault_policy;
+	unsigned long fault_recovery;
+	struct list_head cmdlist;
+	struct list_head memlist;
+	unsigned int marker_timestamp;
+	struct kgsl_mem_entry *profiling_buf_entry;
+	uint64_t profiling_buffer_gpuaddr;
+	unsigned int profile_index;
+	uint64_t submit_ticks;
+};
+
+/**
+ * struct kgsl_drawobj_sync - KGSL sync object
+ * @base: Base kgsl_drawobj, this needs to be the first entry
+ * @synclist: Array of context/timestamp tuples to wait for before issuing
+ * @numsyncs: Number of sync entries in the array
+ * @pending: Bitmask of sync events that are active
+ * @timer: a timer used to track possible sync timeouts for this
+ *         sync obj
+ * @timeout_jiffies: For a sync obj the jiffies at
+ * which the timer will expire
+ */
+struct kgsl_drawobj_sync {
+	struct kgsl_drawobj base;
+	struct kgsl_drawobj_sync_event *synclist;
+	unsigned int numsyncs;
+	unsigned long pending;
+	struct timer_list timer;
+	unsigned long timeout_jiffies;
+};
+
+/**
+ * struct kgsl_drawobj_sync_event
+ * @id: identifer (positiion within the pending bitmap)
+ * @type: Syncpoint type
+ * @syncobj: Pointer to the syncobj that owns the sync event
+ * @context: KGSL context for whose timestamp we want to
+ *           register this event
+ * @timestamp: Pending timestamp for the event
+ * @handle: Pointer to a sync fence handle
+ * @device: Pointer to the KGSL device
+ */
+struct kgsl_drawobj_sync_event {
+	unsigned int id;
+	int type;
+	struct kgsl_drawobj_sync *syncobj;
+	struct kgsl_context *context;
+	unsigned int timestamp;
+	struct kgsl_sync_fence_waiter *handle;
+	struct kgsl_device *device;
+};
+
+#define KGSL_DRAWOBJ_FLAGS \
+	{ KGSL_DRAWOBJ_MARKER, "MARKER" }, \
+	{ KGSL_DRAWOBJ_CTX_SWITCH, "CTX_SWITCH" }, \
+	{ KGSL_DRAWOBJ_SYNC, "SYNC" }, \
+	{ KGSL_DRAWOBJ_END_OF_FRAME, "EOF" }, \
+	{ KGSL_DRAWOBJ_PWR_CONSTRAINT, "PWR_CONSTRAINT" }, \
+	{ KGSL_DRAWOBJ_SUBMIT_IB_LIST, "IB_LIST" }
+
+/**
+ * enum kgsl_drawobj_cmd_priv - Internal command obj flags
+ * @CMDOBJ_SKIP - skip the entire command obj
+ * @CMDOBJ_FORCE_PREAMBLE - Force the preamble on for
+ *           command obj
+ * @CMDOBJ_WFI - Force wait-for-idle for the submission
+ * @CMDOBJ_PROFILE - store the start / retire ticks for
+ * the command obj in the profiling buffer
+ */
+enum kgsl_drawobj_cmd_priv {
+	CMDOBJ_SKIP = 0,
+	CMDOBJ_FORCE_PREAMBLE,
+	CMDOBJ_WFI,
+	CMDOBJ_PROFILE,
+};
+
+struct kgsl_drawobj_cmd *kgsl_drawobj_cmd_create(struct kgsl_device *device,
+		struct kgsl_context *context, unsigned int flags,
+		unsigned int type);
+int kgsl_drawobj_cmd_add_ibdesc(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd *cmdobj, struct kgsl_ibdesc *ibdesc);
+int kgsl_drawobj_cmd_add_ibdesc_list(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd *cmdobj, void __user *ptr, int count);
+int kgsl_drawobj_cmd_add_cmdlist(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd *cmdobj, void __user *ptr,
+		unsigned int size, unsigned int count);
+int kgsl_drawobj_cmd_add_memlist(struct kgsl_device *device,
+		struct kgsl_drawobj_cmd *cmdobj, void __user *ptr,
+		unsigned int size, unsigned int count);
+
+struct kgsl_drawobj_sync *kgsl_drawobj_sync_create(struct kgsl_device *device,
+		struct kgsl_context *context);
+int kgsl_drawobj_sync_add_syncpoints(struct kgsl_device *device,
+		struct kgsl_drawobj_sync *syncobj, void __user *ptr,
+		int count);
+int kgsl_drawobj_sync_add_synclist(struct kgsl_device *device,
+		struct kgsl_drawobj_sync *syncobj, void __user *ptr,
+		unsigned int size, unsigned int count);
+int kgsl_drawobj_sync_add_sync(struct kgsl_device *device,
+		struct kgsl_drawobj_sync *syncobj,
+		struct kgsl_cmd_syncpoint *sync);
+
+int kgsl_drawobj_init(void);
+void kgsl_drawobj_exit(void);
+
+void kgsl_dump_syncpoints(struct kgsl_device *device,
+	struct kgsl_drawobj_sync *syncobj);
+
+void kgsl_drawobj_destroy(struct kgsl_drawobj *drawobj);
+
+static inline bool kgsl_drawobj_events_pending(
+		struct kgsl_drawobj_sync *syncobj)
+{
+	return !bitmap_empty(&syncobj->pending, KGSL_MAX_SYNCPOINTS);
+}
+
+static inline bool kgsl_drawobj_event_pending(
+		struct kgsl_drawobj_sync *syncobj, unsigned int bit)
+{
+	if (bit >= KGSL_MAX_SYNCPOINTS)
+		return false;
+
+	return test_bit(bit, &syncobj->pending);
+}
+#endif /* __KGSL_DRAWOBJ_H */
diff --git a/drivers/gpu/msm/kgsl_events.c b/drivers/gpu/msm/kgsl_events.c
new file mode 100644
index 0000000..d042f05
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_events.c
@@ -0,0 +1,456 @@
+/* Copyright (c) 2011-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/list.h>
+#include <linux/workqueue.h>
+#include <linux/debugfs.h>
+#include <kgsl_device.h>
+
+#include "kgsl_debugfs.h"
+#include "kgsl_trace.h"
+
+/*
+ * Define an kmem cache for the event structures since we allocate and free them
+ * so frequently
+ */
+static struct kmem_cache *events_cache;
+static struct dentry *events_dentry;
+
+static inline void signal_event(struct kgsl_device *device,
+		struct kgsl_event *event, int result)
+{
+	list_del(&event->node);
+	event->result = result;
+	queue_work(device->events_wq, &event->work);
+}
+
+/**
+ * _kgsl_event_worker() - Work handler for processing GPU event callbacks
+ * @work: Pointer to the work_struct for the event
+ *
+ * Each event callback has its own work struct and is run on a event specific
+ * workqeuue.  This is the worker that queues up the event callback function.
+ */
+static void _kgsl_event_worker(struct work_struct *work)
+{
+	struct kgsl_event *event = container_of(work, struct kgsl_event, work);
+	int id = KGSL_CONTEXT_ID(event->context);
+
+	trace_kgsl_fire_event(id, event->timestamp, event->result,
+		jiffies - event->created, event->func);
+
+	event->func(event->device, event->group, event->priv, event->result);
+
+	kgsl_context_put(event->context);
+	kmem_cache_free(events_cache, event);
+}
+
+/* return true if the group needs to be processed */
+static bool _do_process_group(unsigned int processed, unsigned int cur)
+{
+	if (processed == cur)
+		return false;
+
+	/*
+	 * This ensures that the timestamp didn't slip back accidently, maybe
+	 * due to a memory barrier issue. This is highly unlikely but we've
+	 * been burned here in the past.
+	 */
+	if ((cur < processed) && ((processed - cur) < KGSL_TIMESTAMP_WINDOW))
+		return false;
+
+	return true;
+}
+
+static void _process_event_group(struct kgsl_device *device,
+		struct kgsl_event_group *group, bool flush)
+{
+	struct kgsl_event *event, *tmp;
+	unsigned int timestamp;
+	struct kgsl_context *context;
+
+	if (group == NULL)
+		return;
+
+	context = group->context;
+
+	/*
+	 * Sanity check to be sure that we we aren't racing with the context
+	 * getting destroyed
+	 */
+	if (context != NULL && !_kgsl_context_get(context)) {
+		WARN_ON(1);
+		return;
+	}
+
+	spin_lock(&group->lock);
+
+	group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_RETIRED,
+		&timestamp);
+
+	if (!flush && _do_process_group(group->processed, timestamp) == false)
+		goto out;
+
+	list_for_each_entry_safe(event, tmp, &group->events, node) {
+		if (timestamp_cmp(event->timestamp, timestamp) <= 0)
+			signal_event(device, event, KGSL_EVENT_RETIRED);
+		else if (flush)
+			signal_event(device, event, KGSL_EVENT_CANCELLED);
+
+	}
+
+	group->processed = timestamp;
+
+out:
+	spin_unlock(&group->lock);
+	kgsl_context_put(context);
+}
+
+/**
+ * kgsl_process_event_group() - Handle all the retired events in a group
+ * @device: Pointer to a KGSL device
+ * @group: Pointer to a GPU events group to process
+ */
+
+void kgsl_process_event_group(struct kgsl_device *device,
+		struct kgsl_event_group *group)
+{
+	_process_event_group(device, group, false);
+}
+EXPORT_SYMBOL(kgsl_process_event_group);
+
+/**
+ * kgsl_flush_event_group() - flush all the events in a group by retiring the
+ * ones can be retired and cancelling the ones that are pending
+ * @device: Pointer to a KGSL device
+ * @group: Pointer to a GPU events group to process
+ */
+void kgsl_flush_event_group(struct kgsl_device *device,
+		struct kgsl_event_group *group)
+{
+	_process_event_group(device, group, true);
+}
+EXPORT_SYMBOL(kgsl_flush_event_group);
+
+/**
+ * kgsl_cancel_events_timestamp() - Cancel pending events for a given timestamp
+ * @device: Pointer to a KGSL device
+ * @group: Ponter to the GPU event group that owns the event
+ * @timestamp: Registered expiry timestamp for the event
+ */
+void kgsl_cancel_events_timestamp(struct kgsl_device *device,
+		struct kgsl_event_group *group, unsigned int timestamp)
+{
+	struct kgsl_event *event, *tmp;
+
+	spin_lock(&group->lock);
+
+	list_for_each_entry_safe(event, tmp, &group->events, node) {
+		if (timestamp_cmp(timestamp, event->timestamp) == 0)
+			signal_event(device, event, KGSL_EVENT_CANCELLED);
+	}
+
+	spin_unlock(&group->lock);
+}
+EXPORT_SYMBOL(kgsl_cancel_events_timestamp);
+
+/**
+ * kgsl_cancel_events() - Cancel all pending events in the group
+ * @device: Pointer to a KGSL device
+ * @group: Pointer to a kgsl_events_group
+ */
+void kgsl_cancel_events(struct kgsl_device *device,
+		struct kgsl_event_group *group)
+{
+	struct kgsl_event *event, *tmp;
+
+	spin_lock(&group->lock);
+
+	list_for_each_entry_safe(event, tmp, &group->events, node)
+		signal_event(device, event, KGSL_EVENT_CANCELLED);
+
+	spin_unlock(&group->lock);
+}
+EXPORT_SYMBOL(kgsl_cancel_events);
+
+/**
+ * kgsl_cancel_event() - Cancel a specific event from a group
+ * @device: Pointer to a KGSL device
+ * @group: Pointer to the group that contains the events
+ * @timestamp: Registered expiry timestamp for the event
+ * @func: Registered callback for the function
+ * @priv: Registered priv data for the function
+ */
+void kgsl_cancel_event(struct kgsl_device *device,
+		struct kgsl_event_group *group, unsigned int timestamp,
+		kgsl_event_func func, void *priv)
+{
+	struct kgsl_event *event, *tmp;
+
+	spin_lock(&group->lock);
+
+	list_for_each_entry_safe(event, tmp, &group->events, node) {
+		if (timestamp == event->timestamp && func == event->func &&
+			event->priv == priv)
+			signal_event(device, event, KGSL_EVENT_CANCELLED);
+	}
+
+	spin_unlock(&group->lock);
+}
+EXPORT_SYMBOL(kgsl_cancel_event);
+
+/**
+ * kgsl_event_pending() - Searches for an event in an event group
+ * @device: Pointer to a KGSL device
+ * @group: Pointer to the group that contains the events
+ * @timestamp: Registered expiry timestamp for the event
+ * @func: Registered callback for the function
+ * @priv: Registered priv data for the function
+ */
+bool kgsl_event_pending(struct kgsl_device *device,
+		struct kgsl_event_group *group,
+		unsigned int timestamp, kgsl_event_func func, void *priv)
+{
+	struct kgsl_event *event;
+	bool result = false;
+
+	spin_lock(&group->lock);
+	list_for_each_entry(event, &group->events, node) {
+		if (timestamp == event->timestamp && func == event->func &&
+			event->priv == priv) {
+			result = true;
+			break;
+		}
+	}
+	spin_unlock(&group->lock);
+	return result;
+}
+/**
+ * kgsl_add_event() - Add a new GPU event to a group
+ * @device: Pointer to a KGSL device
+ * @group: Pointer to the group to add the event to
+ * @timestamp: Timestamp that the event will expire on
+ * @func: Callback function for the event
+ * @priv: Private data to send to the callback function
+ */
+int kgsl_add_event(struct kgsl_device *device, struct kgsl_event_group *group,
+		unsigned int timestamp, kgsl_event_func func, void *priv)
+{
+	unsigned int queued;
+	struct kgsl_context *context = group->context;
+	struct kgsl_event *event;
+	unsigned int retired;
+
+	if (!func)
+		return -EINVAL;
+
+	/*
+	 * If the caller is creating their own timestamps, let them schedule
+	 * events in the future. Otherwise only allow timestamps that have been
+	 * queued.
+	 */
+	if (!context || !(context->flags & KGSL_CONTEXT_USER_GENERATED_TS)) {
+		group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_QUEUED,
+			&queued);
+
+		if (timestamp_cmp(timestamp, queued) > 0)
+			return -EINVAL;
+	}
+
+	event = kmem_cache_alloc(events_cache, GFP_KERNEL);
+	if (event == NULL)
+		return -ENOMEM;
+
+	/* Get a reference to the context while the event is active */
+	if (context != NULL && !_kgsl_context_get(context)) {
+		kmem_cache_free(events_cache, event);
+		return -ENOENT;
+	}
+
+	event->device = device;
+	event->context = context;
+	event->timestamp = timestamp;
+	event->priv = priv;
+	event->func = func;
+	event->created = jiffies;
+	event->group = group;
+
+	INIT_WORK(&event->work, _kgsl_event_worker);
+
+	trace_kgsl_register_event(KGSL_CONTEXT_ID(context), timestamp, func);
+
+	spin_lock(&group->lock);
+
+	/*
+	 * Check to see if the requested timestamp has already retired.  If so,
+	 * schedule the callback right away
+	 */
+	group->readtimestamp(device, group->priv, KGSL_TIMESTAMP_RETIRED,
+		&retired);
+
+	if (timestamp_cmp(retired, timestamp) >= 0) {
+		event->result = KGSL_EVENT_RETIRED;
+		queue_work(device->events_wq, &event->work);
+		spin_unlock(&group->lock);
+		return 0;
+	}
+
+	/* Add the event to the group list */
+	list_add_tail(&event->node, &group->events);
+
+	spin_unlock(&group->lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_add_event);
+
+static DEFINE_RWLOCK(group_lock);
+static LIST_HEAD(group_list);
+
+void kgsl_process_event_groups(struct kgsl_device *device)
+{
+	struct kgsl_event_group *group;
+
+	read_lock(&group_lock);
+	list_for_each_entry(group, &group_list, group)
+		_process_event_group(device, group, false);
+	read_unlock(&group_lock);
+}
+EXPORT_SYMBOL(kgsl_process_event_groups);
+
+/**
+ * kgsl_del_event_group() - Remove a GPU event group
+ * @group: GPU event group to remove
+ */
+void kgsl_del_event_group(struct kgsl_event_group *group)
+{
+	/* Make sure that all the events have been deleted from the list */
+	BUG_ON(!list_empty(&group->events));
+
+	write_lock(&group_lock);
+	list_del(&group->group);
+	write_unlock(&group_lock);
+}
+EXPORT_SYMBOL(kgsl_del_event_group);
+
+/**
+ * kgsl_add_event_group() - Add a new GPU event group
+ * group: Pointer to the new group to add to the list
+ * context: Context that owns the group (or NULL for global)
+ * name: Name of the group
+ * readtimestamp: Function pointer to the readtimestamp function to call when
+ * processing events
+ * priv: Priv member to pass to the readtimestamp function
+ */
+void kgsl_add_event_group(struct kgsl_event_group *group,
+		struct kgsl_context *context, const char *name,
+		readtimestamp_func readtimestamp, void *priv)
+{
+	BUG_ON(readtimestamp == NULL);
+
+	spin_lock_init(&group->lock);
+	INIT_LIST_HEAD(&group->events);
+
+	group->context = context;
+	group->readtimestamp = readtimestamp;
+	group->priv = priv;
+
+	if (name)
+		strlcpy(group->name, name, sizeof(group->name));
+
+	write_lock(&group_lock);
+	list_add_tail(&group->group, &group_list);
+	write_unlock(&group_lock);
+}
+EXPORT_SYMBOL(kgsl_add_event_group);
+
+static void events_debugfs_print_group(struct seq_file *s,
+		struct kgsl_event_group *group)
+{
+	struct kgsl_event *event;
+	unsigned int retired;
+
+	spin_lock(&group->lock);
+
+	seq_printf(s, "%s: last=%d\n", group->name, group->processed);
+
+	list_for_each_entry(event, &group->events, node) {
+
+		group->readtimestamp(event->device, group->priv,
+			KGSL_TIMESTAMP_RETIRED, &retired);
+
+		seq_printf(s, "\t%d:%d age=%lu func=%ps [retired=%d]\n",
+			group->context ? group->context->id :
+						KGSL_MEMSTORE_GLOBAL,
+			event->timestamp, jiffies  - event->created,
+			event->func, retired);
+	}
+	spin_unlock(&group->lock);
+}
+
+static int events_debugfs_print(struct seq_file *s, void *unused)
+{
+	struct kgsl_event_group *group;
+
+	seq_puts(s, "event groups:\n");
+	seq_puts(s, "--------------\n");
+
+	read_lock(&group_lock);
+	list_for_each_entry(group, &group_list, group) {
+		events_debugfs_print_group(s, group);
+		seq_puts(s, "\n");
+	}
+	read_unlock(&group_lock);
+
+	return 0;
+}
+
+static int events_debugfs_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, events_debugfs_print, NULL);
+}
+
+static const struct file_operations events_fops = {
+	.open = events_debugfs_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+/**
+ * kgsl_events_exit() - Destroy the event kmem cache on module exit
+ */
+void kgsl_events_exit(void)
+{
+	kmem_cache_destroy(events_cache);
+
+	debugfs_remove(events_dentry);
+}
+
+/**
+ * kgsl_events_init() - Create the event kmem cache on module start
+ */
+void __init kgsl_events_init(void)
+{
+	struct dentry *debugfs_dir = kgsl_get_debugfs_dir();
+
+	events_cache = KMEM_CACHE(kgsl_event, 0);
+
+	events_dentry = debugfs_create_file("events", 0444, debugfs_dir, NULL,
+		&events_fops);
+
+	/* Failure to create a debugfs entry is non fatal */
+	if (IS_ERR(events_dentry))
+		events_dentry = NULL;
+}
diff --git a/drivers/gpu/msm/kgsl_ioctl.c b/drivers/gpu/msm/kgsl_ioctl.c
new file mode 100644
index 0000000..2c57816
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_ioctl.c
@@ -0,0 +1,189 @@
+/* Copyright (c) 2008-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/ioctl.h>
+#include <linux/compat.h>
+#include <linux/uaccess.h>
+#include <linux/fs.h>
+#include "kgsl_device.h"
+#include "kgsl_sync.h"
+
+static const struct kgsl_ioctl kgsl_ioctl_funcs[] = {
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_GETPROPERTY,
+			kgsl_ioctl_device_getproperty),
+	/* IOCTL_KGSL_DEVICE_WAITTIMESTAMP is no longer supported */
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DEVICE_WAITTIMESTAMP_CTXTID,
+			kgsl_ioctl_device_waittimestamp_ctxtid),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_RINGBUFFER_ISSUEIBCMDS,
+			kgsl_ioctl_rb_issueibcmds),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SUBMIT_COMMANDS,
+			kgsl_ioctl_submit_commands),
+	/* IOCTL_KGSL_CMDSTREAM_READTIMESTAMP is no longer supported */
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_READTIMESTAMP_CTXTID,
+			kgsl_ioctl_cmdstream_readtimestamp_ctxtid),
+	/* IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP is no longer supported */
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_CMDSTREAM_FREEMEMONTIMESTAMP_CTXTID,
+			kgsl_ioctl_cmdstream_freememontimestamp_ctxtid),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_CREATE,
+			kgsl_ioctl_drawctxt_create),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_DRAWCTXT_DESTROY,
+			kgsl_ioctl_drawctxt_destroy),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_MAP_USER_MEM,
+			kgsl_ioctl_map_user_mem),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FROM_PMEM,
+			kgsl_ioctl_map_user_mem),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FREE,
+			kgsl_ioctl_sharedmem_free),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SHAREDMEM_FLUSH_CACHE,
+			kgsl_ioctl_sharedmem_flush_cache),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC,
+			kgsl_ioctl_gpumem_alloc),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_TIMESTAMP_EVENT,
+			kgsl_ioctl_timestamp_event),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SETPROPERTY,
+			kgsl_ioctl_device_setproperty),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_ALLOC_ID,
+			kgsl_ioctl_gpumem_alloc_id),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_FREE_ID,
+			kgsl_ioctl_gpumem_free_id),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_GET_INFO,
+			kgsl_ioctl_gpumem_get_info),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE,
+			kgsl_ioctl_gpumem_sync_cache),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUMEM_SYNC_CACHE_BULK,
+			kgsl_ioctl_gpumem_sync_cache_bulk),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE,
+			kgsl_ioctl_syncsource_create),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_DESTROY,
+			kgsl_ioctl_syncsource_destroy),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_CREATE_FENCE,
+			kgsl_ioctl_syncsource_create_fence),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SYNCSOURCE_SIGNAL_FENCE,
+			kgsl_ioctl_syncsource_signal_fence),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_ALLOC,
+			kgsl_ioctl_gpuobj_alloc),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_FREE,
+			kgsl_ioctl_gpuobj_free),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_INFO,
+			kgsl_ioctl_gpuobj_info),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_IMPORT,
+			kgsl_ioctl_gpuobj_import),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SYNC,
+			kgsl_ioctl_gpuobj_sync),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPU_COMMAND,
+			kgsl_ioctl_gpu_command),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_GPUOBJ_SET_INFO,
+			kgsl_ioctl_gpuobj_set_info),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_PHYS_ALLOC,
+			kgsl_ioctl_sparse_phys_alloc),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_PHYS_FREE,
+			kgsl_ioctl_sparse_phys_free),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_VIRT_ALLOC,
+			kgsl_ioctl_sparse_virt_alloc),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_VIRT_FREE,
+			kgsl_ioctl_sparse_virt_free),
+	KGSL_IOCTL_FUNC(IOCTL_KGSL_SPARSE_BIND,
+			kgsl_ioctl_sparse_bind),
+};
+
+long kgsl_ioctl_copy_in(unsigned int kernel_cmd, unsigned int user_cmd,
+		unsigned long arg, unsigned char *ptr)
+{
+	unsigned int usize = _IOC_SIZE(user_cmd);
+	unsigned int ksize = _IOC_SIZE(kernel_cmd);
+	unsigned int copy = ksize < usize ? ksize : usize;
+
+	if ((kernel_cmd & IOC_IN) && (user_cmd & IOC_IN)) {
+		if (copy > 0 && copy_from_user(ptr, (void __user *) arg, copy))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+long kgsl_ioctl_copy_out(unsigned int kernel_cmd, unsigned int user_cmd,
+		unsigned long arg, unsigned char *ptr)
+{
+	unsigned int usize = _IOC_SIZE(user_cmd);
+	unsigned int ksize = _IOC_SIZE(kernel_cmd);
+	unsigned int copy = ksize < usize ? ksize : usize;
+
+	if ((kernel_cmd & IOC_OUT) && (user_cmd & IOC_OUT)) {
+		if (copy > 0 && copy_to_user((void __user *) arg, ptr, copy))
+			return -EFAULT;
+	}
+
+	return 0;
+}
+
+long kgsl_ioctl_helper(struct file *filep, unsigned int cmd, unsigned long arg,
+		const struct kgsl_ioctl *cmds, int len)
+{
+	struct kgsl_device_private *dev_priv = filep->private_data;
+	unsigned char data[128] = { 0 };
+	unsigned int nr = _IOC_NR(cmd);
+	long ret;
+
+	static DEFINE_RATELIMIT_STATE(_rs,
+			DEFAULT_RATELIMIT_INTERVAL,
+			DEFAULT_RATELIMIT_BURST);
+
+	if (nr >= len || cmds[nr].func == NULL)
+		return -ENOIOCTLCMD;
+
+	if (_IOC_SIZE(cmds[nr].cmd) > sizeof(data)) {
+		if (__ratelimit(&_rs))
+			WARN(1, "data too big for ioctl 0x%08X: %d/%ld\n",
+				cmd, _IOC_SIZE(cmds[nr].cmd), sizeof(data));
+		return -EINVAL;
+	}
+
+	if (_IOC_SIZE(cmds[nr].cmd)) {
+		ret = kgsl_ioctl_copy_in(cmds[nr].cmd, cmd, arg, data);
+		if (ret)
+			return ret;
+	}
+
+	ret = cmds[nr].func(dev_priv, cmd, data);
+
+	if (ret == 0 && _IOC_SIZE(cmds[nr].cmd))
+		ret = kgsl_ioctl_copy_out(cmds[nr].cmd, cmd, arg, data);
+
+	return ret;
+}
+
+long kgsl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
+{
+	struct kgsl_device_private *dev_priv = filep->private_data;
+	struct kgsl_device *device = dev_priv->device;
+	long ret;
+
+	ret = kgsl_ioctl_helper(filep, cmd, arg, kgsl_ioctl_funcs,
+		ARRAY_SIZE(kgsl_ioctl_funcs));
+
+	/*
+	 * If the command was unrecognized in the generic core, try the device
+	 * specific function
+	 */
+
+	if (ret == -ENOIOCTLCMD) {
+		if (is_compat_task() && device->ftbl->compat_ioctl != NULL)
+			return device->ftbl->compat_ioctl(dev_priv, cmd, arg);
+		else if (device->ftbl->ioctl != NULL)
+			return device->ftbl->ioctl(dev_priv, cmd, arg);
+
+		KGSL_DRV_INFO(device, "invalid ioctl code 0x%08X\n", cmd);
+	}
+
+	return ret;
+}
diff --git a/drivers/gpu/msm/kgsl_iommu.c b/drivers/gpu/msm/kgsl_iommu.c
new file mode 100644
index 0000000..eaf0995
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_iommu.c
@@ -0,0 +1,2650 @@
+/* Copyright (c) 2011-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/types.h>
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+#include <linux/iommu.h>
+#include <linux/msm_kgsl.h>
+#include <linux/ratelimit.h>
+#include <linux/of_platform.h>
+#include <soc/qcom/scm.h>
+#include <soc/qcom/secure_buffer.h>
+#include <stddef.h>
+#include <linux/compat.h>
+
+#include "kgsl.h"
+#include "kgsl_device.h"
+#include "kgsl_mmu.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_iommu.h"
+#include "adreno_pm4types.h"
+#include "adreno.h"
+#include "kgsl_trace.h"
+#include "kgsl_pwrctrl.h"
+
+#define _IOMMU_PRIV(_mmu) (&((_mmu)->priv.iommu))
+
+#define ADDR_IN_GLOBAL(_a) \
+	(((_a) >= KGSL_IOMMU_GLOBAL_MEM_BASE) && \
+	 ((_a) < (KGSL_IOMMU_GLOBAL_MEM_BASE + KGSL_IOMMU_GLOBAL_MEM_SIZE)))
+
+static struct kgsl_mmu_pt_ops iommu_pt_ops;
+static bool need_iommu_sync;
+
+const unsigned int kgsl_iommu_reg_list[KGSL_IOMMU_REG_MAX] = {
+	0x0,/* SCTLR */
+	0x20,/* TTBR0 */
+	0x34,/* CONTEXTIDR */
+	0x58,/* FSR */
+	0x60,/* FAR_0 */
+	0x618,/* TLBIALL */
+	0x008,/* RESUME */
+	0x68,/* FSYNR0 */
+	0x6C,/* FSYNR1 */
+	0x7F0,/* TLBSYNC */
+	0x7F4,/* TLBSTATUS */
+};
+
+/*
+ * struct kgsl_iommu_addr_entry - entry in the kgsl_iommu_pt rbtree.
+ * @base: starting virtual address of the entry
+ * @size: size of the entry
+ * @node: the rbtree node
+ *
+ */
+struct kgsl_iommu_addr_entry {
+	uint64_t base;
+	uint64_t size;
+	struct rb_node node;
+};
+
+static struct kmem_cache *addr_entry_cache;
+
+/*
+ * There are certain memory allocations (ringbuffer, memstore, etc) that need to
+ * be present at the same address in every pagetable. We call these "global"
+ * pagetable entries. There are relatively few of these and they are mostly
+ * stable (defined at init time) but the actual number of globals can differ
+ * slight depending on the target and implementation.
+ *
+ * Here we define an array and a simple allocator to keep track of the currently
+ * active global entries. Each entry is assigned a unique address inside of a
+ * MMU implementation specific "global" region. The addresses are assigned
+ * sequentially and never re-used to avoid having to go back and reprogram
+ * existing pagetables. The entire list of active entries are mapped and
+ * unmapped into every new pagetable as it is created and destroyed.
+ *
+ * Because there are relatively few entries and they are defined at boot time we
+ * don't need to go over the top to define a dynamic allocation scheme. It will
+ * be less wasteful to pick a static number with a little bit of growth
+ * potential.
+ */
+
+#define GLOBAL_PT_ENTRIES 32
+
+struct global_pt_entry {
+	struct kgsl_memdesc *memdesc;
+	char name[32];
+};
+
+static struct global_pt_entry global_pt_entries[GLOBAL_PT_ENTRIES];
+static struct kgsl_memdesc *kgsl_global_secure_pt_entry;
+static int global_pt_count;
+uint64_t global_pt_alloc;
+static struct kgsl_memdesc gpu_qdss_desc;
+
+void kgsl_print_global_pt_entries(struct seq_file *s)
+{
+	int i;
+
+	for (i = 0; i < global_pt_count; i++) {
+		struct kgsl_memdesc *memdesc = global_pt_entries[i].memdesc;
+
+		if (memdesc == NULL)
+			continue;
+
+		seq_printf(s, "0x%16.16llX-0x%16.16llX %16llu %s\n",
+			memdesc->gpuaddr, memdesc->gpuaddr + memdesc->size - 1,
+			memdesc->size, global_pt_entries[i].name);
+	}
+}
+
+static void kgsl_iommu_unmap_globals(struct kgsl_pagetable *pagetable)
+{
+	unsigned int i;
+
+	for (i = 0; i < global_pt_count; i++) {
+		if (global_pt_entries[i].memdesc != NULL)
+			kgsl_mmu_unmap(pagetable,
+					global_pt_entries[i].memdesc);
+	}
+}
+
+static int kgsl_iommu_map_globals(struct kgsl_pagetable *pagetable)
+{
+	unsigned int i;
+
+	for (i = 0; i < global_pt_count; i++) {
+		if (global_pt_entries[i].memdesc != NULL) {
+			int ret = kgsl_mmu_map(pagetable,
+					global_pt_entries[i].memdesc);
+
+			if (ret)
+				return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void kgsl_iommu_unmap_global_secure_pt_entry(struct kgsl_pagetable
+								*pagetable)
+{
+	struct kgsl_memdesc *entry = kgsl_global_secure_pt_entry;
+
+	if (entry != NULL)
+		kgsl_mmu_unmap(pagetable, entry);
+
+}
+
+static int kgsl_map_global_secure_pt_entry(struct kgsl_pagetable *pagetable)
+{
+	int ret = 0;
+	struct kgsl_memdesc *entry = kgsl_global_secure_pt_entry;
+
+	if (entry != NULL) {
+		entry->pagetable = pagetable;
+		ret = kgsl_mmu_map(pagetable, entry);
+	}
+	return ret;
+}
+
+static void kgsl_iommu_remove_global(struct kgsl_mmu *mmu,
+		struct kgsl_memdesc *memdesc)
+{
+	int i;
+
+	if (memdesc->gpuaddr == 0 || !(memdesc->priv & KGSL_MEMDESC_GLOBAL))
+		return;
+
+	for (i = 0; i < global_pt_count; i++) {
+		if (global_pt_entries[i].memdesc == memdesc) {
+			memdesc->gpuaddr = 0;
+			memdesc->priv &= ~KGSL_MEMDESC_GLOBAL;
+			global_pt_entries[i].memdesc = NULL;
+			return;
+		}
+	}
+}
+
+static void kgsl_iommu_add_global(struct kgsl_mmu *mmu,
+		struct kgsl_memdesc *memdesc, const char *name)
+{
+	if (memdesc->gpuaddr != 0)
+		return;
+
+	/*Check that we can fit the global allocations */
+	if (WARN_ON(global_pt_count >= GLOBAL_PT_ENTRIES) ||
+		WARN_ON((global_pt_alloc + memdesc->size) >=
+			KGSL_IOMMU_GLOBAL_MEM_SIZE))
+		return;
+
+	memdesc->gpuaddr = KGSL_IOMMU_GLOBAL_MEM_BASE + global_pt_alloc;
+	memdesc->priv |= KGSL_MEMDESC_GLOBAL;
+	global_pt_alloc += memdesc->size;
+
+	global_pt_entries[global_pt_count].memdesc = memdesc;
+	strlcpy(global_pt_entries[global_pt_count].name, name,
+			sizeof(global_pt_entries[global_pt_count].name));
+	global_pt_count++;
+}
+
+void kgsl_add_global_secure_entry(struct kgsl_device *device,
+					struct kgsl_memdesc *memdesc)
+{
+	memdesc->gpuaddr = KGSL_IOMMU_SECURE_BASE;
+	kgsl_global_secure_pt_entry = memdesc;
+}
+
+struct kgsl_memdesc *kgsl_iommu_get_qdss_global_entry(void)
+{
+	return &gpu_qdss_desc;
+}
+
+static void kgsl_setup_qdss_desc(struct kgsl_device *device)
+{
+	int result = 0;
+	uint32_t gpu_qdss_entry[2];
+
+	if (!of_find_property(device->pdev->dev.of_node,
+		"qcom,gpu-qdss-stm", NULL))
+		return;
+
+	if (of_property_read_u32_array(device->pdev->dev.of_node,
+				"qcom,gpu-qdss-stm", gpu_qdss_entry, 2)) {
+		KGSL_CORE_ERR("Failed to read gpu qdss dts entry\n");
+		return;
+	}
+
+	gpu_qdss_desc.flags = 0;
+	gpu_qdss_desc.priv = 0;
+	gpu_qdss_desc.physaddr = gpu_qdss_entry[0];
+	gpu_qdss_desc.size = gpu_qdss_entry[1];
+	gpu_qdss_desc.pagetable = NULL;
+	gpu_qdss_desc.ops = NULL;
+	gpu_qdss_desc.dev = device->dev->parent;
+	gpu_qdss_desc.hostptr = NULL;
+
+	result = memdesc_sg_dma(&gpu_qdss_desc, gpu_qdss_desc.physaddr,
+			gpu_qdss_desc.size);
+	if (result) {
+		KGSL_CORE_ERR("memdesc_sg_dma failed: %d\n", result);
+		return;
+	}
+
+	kgsl_mmu_add_global(device, &gpu_qdss_desc, "gpu-qdss");
+}
+
+static inline void kgsl_cleanup_qdss_desc(struct kgsl_mmu *mmu)
+{
+	kgsl_iommu_remove_global(mmu, &gpu_qdss_desc);
+	kgsl_sharedmem_free(&gpu_qdss_desc);
+}
+
+
+static inline void _iommu_sync_mmu_pc(bool lock)
+{
+	if (need_iommu_sync == false)
+		return;
+
+	if (lock)
+		mutex_lock(&kgsl_mmu_sync);
+	else
+		mutex_unlock(&kgsl_mmu_sync);
+}
+
+static void _detach_pt(struct kgsl_iommu_pt *iommu_pt,
+			  struct kgsl_iommu_context *ctx)
+{
+	if (iommu_pt->attached) {
+		_iommu_sync_mmu_pc(true);
+		iommu_detach_device(iommu_pt->domain, ctx->dev);
+		_iommu_sync_mmu_pc(false);
+		iommu_pt->attached = false;
+	}
+}
+
+static int _attach_pt(struct kgsl_iommu_pt *iommu_pt,
+			struct kgsl_iommu_context *ctx)
+{
+	int ret;
+
+	if (iommu_pt->attached)
+		return 0;
+
+	_iommu_sync_mmu_pc(true);
+	ret = iommu_attach_device(iommu_pt->domain, ctx->dev);
+	_iommu_sync_mmu_pc(false);
+
+	if (ret == 0)
+		iommu_pt->attached = true;
+
+	return ret;
+}
+
+static int _lock_if_secure_mmu(struct kgsl_memdesc *memdesc,
+		struct kgsl_mmu *mmu)
+{
+	struct kgsl_device *device = KGSL_MMU_DEVICE(mmu);
+
+	if (!kgsl_memdesc_is_secured(memdesc))
+		return 0;
+
+	if (!kgsl_mmu_is_secured(mmu))
+		return -EINVAL;
+
+	mutex_lock(&device->mutex);
+	if (kgsl_active_count_get(device)) {
+		mutex_unlock(&device->mutex);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void _unlock_if_secure_mmu(struct kgsl_memdesc *memdesc,
+		struct kgsl_mmu *mmu)
+{
+	struct kgsl_device *device = KGSL_MMU_DEVICE(mmu);
+
+	if (!kgsl_memdesc_is_secured(memdesc) || !kgsl_mmu_is_secured(mmu))
+		return;
+
+	kgsl_active_count_put(device);
+	mutex_unlock(&device->mutex);
+}
+
+static int _iommu_map_sync_pc(struct kgsl_pagetable *pt,
+		struct kgsl_memdesc *memdesc,
+		uint64_t gpuaddr, phys_addr_t physaddr,
+		uint64_t size, unsigned int flags)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+	int ret;
+
+	ret = _lock_if_secure_mmu(memdesc, pt->mmu);
+	if (ret)
+		return ret;
+
+	_iommu_sync_mmu_pc(true);
+
+	ret = iommu_map(iommu_pt->domain, gpuaddr, physaddr, size, flags);
+
+	_iommu_sync_mmu_pc(false);
+
+	_unlock_if_secure_mmu(memdesc, pt->mmu);
+
+	if (ret) {
+		KGSL_CORE_ERR("map err: 0x%016llX, 0x%llx, 0x%x, %d\n",
+			gpuaddr, size, flags, ret);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int _iommu_unmap_sync_pc(struct kgsl_pagetable *pt,
+		struct kgsl_memdesc *memdesc, uint64_t addr, uint64_t size)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+	size_t unmapped = 0;
+	int ret;
+
+	ret = _lock_if_secure_mmu(memdesc, pt->mmu);
+	if (ret)
+		return ret;
+
+	_iommu_sync_mmu_pc(true);
+
+	unmapped = iommu_unmap(iommu_pt->domain, addr, size);
+
+	_iommu_sync_mmu_pc(false);
+
+	_unlock_if_secure_mmu(memdesc, pt->mmu);
+
+	if (unmapped != size) {
+		KGSL_CORE_ERR("unmap err: 0x%016llx, 0x%llx, %zd\n",
+			addr, size, unmapped);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int _iommu_map_sg_offset_sync_pc(struct kgsl_pagetable *pt,
+		uint64_t addr, struct kgsl_memdesc *memdesc,
+		struct scatterlist *sg, int nents,
+		uint64_t offset, uint64_t size, unsigned int flags)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+	uint64_t offset_tmp = offset;
+	uint64_t size_tmp = size;
+	size_t mapped = 0;
+	unsigned int i;
+	struct scatterlist *s;
+	phys_addr_t physaddr;
+	int ret;
+
+	ret = _lock_if_secure_mmu(memdesc, pt->mmu);
+	if (ret)
+		return ret;
+
+	_iommu_sync_mmu_pc(true);
+
+	for_each_sg(sg, s, nents, i) {
+		/* Iterate until we find the offset */
+		if (offset_tmp >= s->length) {
+			offset_tmp -= s->length;
+			continue;
+		}
+
+		/* How much mapping is needed in this sg? */
+		if (size < s->length - offset_tmp)
+			size_tmp = size;
+		else
+			size_tmp = s->length - offset_tmp;
+
+		/* Get the phys addr for the offset page */
+		if (offset_tmp != 0) {
+			physaddr = page_to_phys(nth_page(sg_page(s),
+					offset_tmp >> PAGE_SHIFT));
+			/* Reset offset_tmp */
+			offset_tmp = 0;
+		} else
+			physaddr = page_to_phys(sg_page(s));
+
+		/* Do the map for this sg */
+		ret = iommu_map(iommu_pt->domain, addr + mapped,
+				physaddr, size_tmp, flags);
+		if (ret)
+			break;
+
+		mapped += size_tmp;
+		size -= size_tmp;
+
+		if (size == 0)
+			break;
+	}
+
+	_iommu_sync_mmu_pc(false);
+
+	_unlock_if_secure_mmu(memdesc, pt->mmu);
+
+	if (size != 0) {
+		/* Cleanup on error */
+		_iommu_unmap_sync_pc(pt, memdesc, addr, mapped);
+		KGSL_CORE_ERR(
+			"map sg offset err: 0x%016llX, %d, %x, %zd\n",
+			addr, nents, flags, mapped);
+		return  -ENODEV;
+	}
+
+	return 0;
+}
+
+static int _iommu_map_sg_sync_pc(struct kgsl_pagetable *pt,
+		uint64_t addr, struct kgsl_memdesc *memdesc,
+		struct scatterlist *sg, int nents,
+		unsigned int flags)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+	size_t mapped;
+	int ret;
+
+	ret = _lock_if_secure_mmu(memdesc, pt->mmu);
+	if (ret)
+		return ret;
+
+	_iommu_sync_mmu_pc(true);
+
+	mapped = iommu_map_sg(iommu_pt->domain, addr, sg, nents, flags);
+
+	_iommu_sync_mmu_pc(false);
+
+	_unlock_if_secure_mmu(memdesc, pt->mmu);
+
+	if (mapped == 0) {
+		KGSL_CORE_ERR("map sg err: 0x%016llX, %d, %x, %zd\n",
+			addr, nents, flags, mapped);
+		return  -ENODEV;
+	}
+
+	return 0;
+}
+
+/*
+ * One page allocation for a guard region to protect against over-zealous
+ * GPU pre-fetch
+ */
+
+static struct page *kgsl_guard_page;
+static struct kgsl_memdesc kgsl_secure_guard_page_memdesc;
+
+/*
+ * The dummy page is a placeholder/extra page to be used for sparse mappings.
+ * This page will be mapped to all virtual sparse bindings that are not
+ * physically backed.
+ */
+static struct page *kgsl_dummy_page;
+
+/* These functions help find the nearest allocated memory entries on either side
+ * of a faulting address. If we know the nearby allocations memory we can
+ * get a better determination of what we think should have been located in the
+ * faulting region
+ */
+
+/*
+ * A local structure to make it easy to store the interesting bits for the
+ * memory entries on either side of the faulting address
+ */
+
+struct _mem_entry {
+	uint64_t gpuaddr;
+	uint64_t size;
+	uint64_t flags;
+	unsigned int priv;
+	int pending_free;
+	pid_t pid;
+	char name[32];
+};
+
+static void _get_global_entries(uint64_t faultaddr,
+		struct _mem_entry *prev,
+		struct _mem_entry *next)
+{
+	int i;
+	uint64_t prevaddr = 0;
+	struct global_pt_entry *p = NULL;
+
+	uint64_t nextaddr = (uint64_t) -1;
+	struct global_pt_entry *n = NULL;
+
+	for (i = 0; i < global_pt_count; i++) {
+		uint64_t addr;
+
+		if (global_pt_entries[i].memdesc == NULL)
+			continue;
+
+		addr = global_pt_entries[i].memdesc->gpuaddr;
+		if ((addr < faultaddr) && (addr > prevaddr)) {
+			prevaddr = addr;
+			p = &global_pt_entries[i];
+		}
+
+		if ((addr > faultaddr) && (addr < nextaddr)) {
+			nextaddr = addr;
+			n = &global_pt_entries[i];
+		}
+	}
+
+	if (p != NULL) {
+		prev->gpuaddr = p->memdesc->gpuaddr;
+		prev->size = p->memdesc->size;
+		prev->flags = p->memdesc->flags;
+		prev->priv = p->memdesc->priv;
+		prev->pid = 0;
+		strlcpy(prev->name, p->name, sizeof(prev->name));
+	}
+
+	if (n != NULL) {
+		next->gpuaddr = n->memdesc->gpuaddr;
+		next->size = n->memdesc->size;
+		next->flags = n->memdesc->flags;
+		next->priv = n->memdesc->priv;
+		next->pid = 0;
+		strlcpy(next->name, n->name, sizeof(next->name));
+	}
+}
+
+void __kgsl_get_memory_usage(struct _mem_entry *entry)
+{
+	kgsl_get_memory_usage(entry->name, sizeof(entry->name), entry->flags);
+}
+
+static void _get_entries(struct kgsl_process_private *private,
+		uint64_t faultaddr, struct _mem_entry *prev,
+		struct _mem_entry *next)
+{
+	int id;
+	struct kgsl_mem_entry *entry;
+
+	uint64_t prevaddr = 0;
+	struct kgsl_mem_entry *p = NULL;
+
+	uint64_t nextaddr = (uint64_t) -1;
+	struct kgsl_mem_entry *n = NULL;
+
+	idr_for_each_entry(&private->mem_idr, entry, id) {
+		uint64_t addr = entry->memdesc.gpuaddr;
+
+		if ((addr < faultaddr) && (addr > prevaddr)) {
+			prevaddr = addr;
+			p = entry;
+		}
+
+		if ((addr > faultaddr) && (addr < nextaddr)) {
+			nextaddr = addr;
+			n = entry;
+		}
+	}
+
+	if (p != NULL) {
+		prev->gpuaddr = p->memdesc.gpuaddr;
+		prev->size = p->memdesc.size;
+		prev->flags = p->memdesc.flags;
+		prev->priv = p->memdesc.priv;
+		prev->pending_free = p->pending_free;
+		prev->pid = private->pid;
+		__kgsl_get_memory_usage(prev);
+	}
+
+	if (n != NULL) {
+		next->gpuaddr = n->memdesc.gpuaddr;
+		next->size = n->memdesc.size;
+		next->flags = n->memdesc.flags;
+		next->priv = n->memdesc.priv;
+		next->pending_free = n->pending_free;
+		next->pid = private->pid;
+		__kgsl_get_memory_usage(next);
+	}
+}
+
+static void _find_mem_entries(struct kgsl_mmu *mmu, uint64_t faultaddr,
+		struct _mem_entry *preventry, struct _mem_entry *nextentry,
+		struct kgsl_context *context)
+{
+	struct kgsl_process_private *private;
+
+	memset(preventry, 0, sizeof(*preventry));
+	memset(nextentry, 0, sizeof(*nextentry));
+
+	/* Set the maximum possible size as an initial value */
+	nextentry->gpuaddr = (uint64_t) -1;
+
+	if (ADDR_IN_GLOBAL(faultaddr)) {
+		_get_global_entries(faultaddr, preventry, nextentry);
+	} else if (context) {
+		private = context->proc_priv;
+		spin_lock(&private->mem_lock);
+		_get_entries(private, faultaddr, preventry, nextentry);
+		spin_unlock(&private->mem_lock);
+	}
+}
+
+static void _print_entry(struct kgsl_device *device, struct _mem_entry *entry)
+{
+	KGSL_LOG_DUMP(device,
+		"[%016llX - %016llX] %s %s (pid = %d) (%s)\n",
+		entry->gpuaddr,
+		entry->gpuaddr + entry->size,
+		entry->priv & KGSL_MEMDESC_GUARD_PAGE ? "(+guard)" : "",
+		entry->pending_free ? "(pending free)" : "",
+		entry->pid, entry->name);
+}
+
+static void _check_if_freed(struct kgsl_iommu_context *ctx,
+	uint64_t addr, pid_t ptname)
+{
+	uint64_t gpuaddr = addr;
+	uint64_t size = 0;
+	uint64_t flags = 0;
+	pid_t pid;
+
+	char name[32];
+
+	memset(name, 0, sizeof(name));
+
+	if (kgsl_memfree_find_entry(ptname, &gpuaddr, &size, &flags, &pid)) {
+		kgsl_get_memory_usage(name, sizeof(name) - 1, flags);
+		KGSL_LOG_DUMP(ctx->kgsldev, "---- premature free ----\n");
+		KGSL_LOG_DUMP(ctx->kgsldev,
+			"[%8.8llX-%8.8llX] (%s) was already freed by pid %d\n",
+			gpuaddr, gpuaddr + size, name, pid);
+	}
+}
+
+static bool
+kgsl_iommu_uche_overfetch(struct kgsl_process_private *private,
+		uint64_t faultaddr)
+{
+	int id;
+	struct kgsl_mem_entry *entry = NULL;
+
+	spin_lock(&private->mem_lock);
+	idr_for_each_entry(&private->mem_idr, entry, id) {
+		struct kgsl_memdesc *m = &entry->memdesc;
+
+		if ((faultaddr >= (m->gpuaddr + m->size))
+				&& (faultaddr < (m->gpuaddr + m->size + 64))) {
+			spin_unlock(&private->mem_lock);
+			return true;
+		}
+	}
+	spin_unlock(&private->mem_lock);
+	return false;
+}
+
+/*
+ * Read pagefaults where the faulting address lies within the first 64 bytes
+ * of a page (UCHE line size is 64 bytes) and the fault page is preceded by a
+ * valid allocation are considered likely due to UCHE overfetch and suppressed.
+ */
+
+static bool kgsl_iommu_suppress_pagefault(uint64_t faultaddr, int write,
+					struct kgsl_context *context)
+{
+	/*
+	 * If there is no context associated with the pagefault then this
+	 * could be a fault on a global buffer. We do not suppress faults
+	 * on global buffers as they are mainly accessed by the CP bypassing
+	 * the UCHE. Also, write pagefaults are never suppressed.
+	 */
+	if (!context || write)
+		return false;
+
+	return kgsl_iommu_uche_overfetch(context->proc_priv, faultaddr);
+}
+
+static int kgsl_iommu_fault_handler(struct iommu_domain *domain,
+	struct device *dev, unsigned long addr, int flags, void *token)
+{
+	int ret = 0;
+	struct kgsl_pagetable *pt = token;
+	struct kgsl_mmu *mmu = pt->mmu;
+	struct kgsl_iommu *iommu;
+	struct kgsl_iommu_context *ctx;
+	u64 ptbase;
+	u32 contextidr;
+	pid_t tid = 0;
+	pid_t ptname;
+	struct _mem_entry prev, next;
+	int write;
+	struct kgsl_device *device;
+	struct adreno_device *adreno_dev;
+	unsigned int no_page_fault_log = 0;
+	unsigned int curr_context_id = 0;
+	struct kgsl_context *context;
+	char *fault_type = "unknown";
+
+	static DEFINE_RATELIMIT_STATE(_rs,
+					DEFAULT_RATELIMIT_INTERVAL,
+					DEFAULT_RATELIMIT_BURST);
+
+	if (mmu == NULL)
+		return ret;
+
+	iommu = _IOMMU_PRIV(mmu);
+	ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	device = KGSL_MMU_DEVICE(mmu);
+	adreno_dev = ADRENO_DEVICE(device);
+
+	if (pt->name == KGSL_MMU_SECURE_PT)
+		ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE];
+
+	/*
+	 * set the fault bits and stuff before any printks so that if fault
+	 * handler runs then it will know it's dealing with a pagefault.
+	 * Read the global current timestamp because we could be in middle of
+	 * RB switch and hence the cur RB may not be reliable but global
+	 * one will always be reliable
+	 */
+	kgsl_sharedmem_readl(&device->memstore, &curr_context_id,
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context));
+
+	context = kgsl_context_get(device, curr_context_id);
+
+	write = (flags & IOMMU_FAULT_WRITE) ? 1 : 0;
+	if (flags & IOMMU_FAULT_TRANSLATION)
+		fault_type = "translation";
+	else if (flags & IOMMU_FAULT_PERMISSION)
+		fault_type = "permission";
+
+	if (kgsl_iommu_suppress_pagefault(addr, write, context)) {
+		iommu->pagefault_suppression_count++;
+		kgsl_context_put(context);
+		return ret;
+	}
+
+	if (context != NULL) {
+		/* save pagefault timestamp for GFT */
+		set_bit(KGSL_CONTEXT_PRIV_PAGEFAULT, &context->priv);
+		tid = context->tid;
+	}
+
+	ctx->fault = 1;
+
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE,
+		&adreno_dev->ft_pf_policy) &&
+		(flags & IOMMU_FAULT_TRANSACTION_STALLED)) {
+		/*
+		 * Turn off GPU IRQ so we don't get faults from it too.
+		 * The device mutex must be held to change power state
+		 */
+		mutex_lock(&device->mutex);
+		kgsl_pwrctrl_change_state(device, KGSL_STATE_AWARE);
+		mutex_unlock(&device->mutex);
+	}
+
+	ptbase = KGSL_IOMMU_GET_CTX_REG_Q(ctx, TTBR0);
+	contextidr = KGSL_IOMMU_GET_CTX_REG(ctx, CONTEXTIDR);
+
+	ptname = MMU_FEATURE(mmu, KGSL_MMU_GLOBAL_PAGETABLE) ?
+		KGSL_MMU_GLOBAL_PT : tid;
+
+	if (test_bit(KGSL_FT_PAGEFAULT_LOG_ONE_PER_PAGE,
+		&adreno_dev->ft_pf_policy))
+		no_page_fault_log = kgsl_mmu_log_fault_addr(mmu, ptbase, addr);
+
+	if (!no_page_fault_log && __ratelimit(&_rs)) {
+		KGSL_MEM_CRIT(ctx->kgsldev,
+			"GPU PAGE FAULT: addr = %lX pid= %d\n", addr, ptname);
+		KGSL_MEM_CRIT(ctx->kgsldev,
+			"context=%s TTBR0=0x%llx CIDR=0x%x (%s %s fault)\n",
+			ctx->name, ptbase, contextidr,
+			write ? "write" : "read", fault_type);
+
+		/* Don't print the debug if this is a permissions fault */
+		if (!(flags & IOMMU_FAULT_PERMISSION)) {
+			_check_if_freed(ctx, addr, ptname);
+
+			KGSL_LOG_DUMP(ctx->kgsldev,
+				"---- nearby memory ----\n");
+
+			_find_mem_entries(mmu, addr, &prev, &next, context);
+			if (prev.gpuaddr)
+				_print_entry(ctx->kgsldev, &prev);
+			else
+				KGSL_LOG_DUMP(ctx->kgsldev, "*EMPTY*\n");
+
+			KGSL_LOG_DUMP(ctx->kgsldev, " <- fault @ %8.8lX\n",
+				addr);
+
+			if (next.gpuaddr != (uint64_t) -1)
+				_print_entry(ctx->kgsldev, &next);
+			else
+				KGSL_LOG_DUMP(ctx->kgsldev, "*EMPTY*\n");
+		}
+	}
+
+	trace_kgsl_mmu_pagefault(ctx->kgsldev, addr,
+			ptname, write ? "write" : "read");
+
+	/*
+	 * We do not want the h/w to resume fetching data from an iommu
+	 * that has faulted, this is better for debugging as it will stall
+	 * the GPU and trigger a snapshot. Return EBUSY error.
+	 */
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE,
+		&adreno_dev->ft_pf_policy) &&
+		(flags & IOMMU_FAULT_TRANSACTION_STALLED)) {
+		uint32_t sctlr_val;
+
+		ret = -EBUSY;
+		/*
+		 * Disable context fault interrupts
+		 * as we do not clear FSR in the ISR.
+		 * Will be re-enabled after FSR is cleared.
+		 */
+		sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR);
+		sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFIE_SHIFT);
+		KGSL_IOMMU_SET_CTX_REG(ctx, SCTLR, sctlr_val);
+
+		adreno_set_gpu_fault(adreno_dev, ADRENO_IOMMU_PAGE_FAULT);
+		/* Go ahead with recovery*/
+		adreno_dispatcher_schedule(device);
+	}
+
+	kgsl_context_put(context);
+	return ret;
+}
+
+/*
+ * kgsl_iommu_disable_clk() - Disable iommu clocks
+ * Disable IOMMU clocks
+ */
+static void kgsl_iommu_disable_clk(struct kgsl_mmu *mmu)
+{
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	int j;
+
+	atomic_dec(&iommu->clk_enable_count);
+
+	/*
+	 * Make sure the clk refcounts are good. An unbalance may
+	 * cause the clocks to be off when we need them on.
+	 */
+	WARN_ON(atomic_read(&iommu->clk_enable_count) < 0);
+
+	for (j = (KGSL_IOMMU_MAX_CLKS - 1); j >= 0; j--)
+		if (iommu->clks[j])
+			clk_disable_unprepare(iommu->clks[j]);
+}
+
+/*
+ * kgsl_iommu_enable_clk_prepare_enable - Enable the specified IOMMU clock
+ * Try 4 times to enable it and then BUG() for debug
+ */
+static void kgsl_iommu_clk_prepare_enable(struct clk *clk)
+{
+	int num_retries = 4;
+
+	while (num_retries--) {
+		if (!clk_prepare_enable(clk))
+			return;
+	}
+
+	/* Failure is fatal so BUG() to facilitate debug */
+	KGSL_CORE_ERR("IOMMU clock enable failed\n");
+	BUG();
+}
+
+/*
+ * kgsl_iommu_enable_clk - Enable iommu clocks
+ * Enable all the IOMMU clocks
+ */
+static void kgsl_iommu_enable_clk(struct kgsl_mmu *mmu)
+{
+	int j;
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+
+	for (j = 0; j < KGSL_IOMMU_MAX_CLKS; j++) {
+		if (iommu->clks[j])
+			kgsl_iommu_clk_prepare_enable(iommu->clks[j]);
+	}
+	atomic_inc(&iommu->clk_enable_count);
+}
+
+/* kgsl_iommu_get_ttbr0 - Get TTBR0 setting for a pagetable */
+static u64 kgsl_iommu_get_ttbr0(struct kgsl_pagetable *pt)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt ? pt->priv : NULL;
+
+	BUG_ON(iommu_pt == NULL);
+
+	return iommu_pt->ttbr0;
+}
+
+static bool kgsl_iommu_pt_equal(struct kgsl_mmu *mmu,
+				struct kgsl_pagetable *pt,
+				u64 ttbr0)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt ? pt->priv : NULL;
+	u64 domain_ttbr0;
+
+	if (iommu_pt == NULL)
+		return 0;
+
+	domain_ttbr0 = kgsl_iommu_get_ttbr0(pt);
+
+	return (domain_ttbr0 == ttbr0);
+}
+
+/* kgsl_iommu_get_contextidr - query CONTEXTIDR setting for a pagetable */
+static u32 kgsl_iommu_get_contextidr(struct kgsl_pagetable *pt)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt ? pt->priv : NULL;
+
+	BUG_ON(iommu_pt == NULL);
+
+	return iommu_pt->contextidr;
+}
+
+/*
+ * kgsl_iommu_destroy_pagetable - Free up reaources help by a pagetable
+ * @mmu_specific_pt - Pointer to pagetable which is to be freed
+ *
+ * Return - void
+ */
+static void kgsl_iommu_destroy_pagetable(struct kgsl_pagetable *pt)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+	struct kgsl_mmu *mmu = pt->mmu;
+	struct kgsl_iommu *iommu;
+	struct kgsl_iommu_context  *ctx;
+
+	/*
+	 * Make sure all allocations are unmapped before destroying
+	 * the pagetable
+	 */
+	WARN_ON(!list_empty(&pt->list));
+
+	iommu = _IOMMU_PRIV(mmu);
+
+	if (pt->name == KGSL_MMU_SECURE_PT) {
+		ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE];
+		kgsl_iommu_unmap_global_secure_pt_entry(pt);
+	} else {
+		ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+		kgsl_iommu_unmap_globals(pt);
+	}
+
+	if (iommu_pt->domain) {
+		trace_kgsl_pagetable_destroy(iommu_pt->ttbr0, pt->name);
+
+		_detach_pt(iommu_pt, ctx);
+
+		iommu_domain_free(iommu_pt->domain);
+	}
+
+	kfree(iommu_pt);
+}
+
+static void setup_64bit_pagetable(struct kgsl_mmu *mmu,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_iommu_pt *pt)
+{
+	unsigned int secure_global_size = kgsl_global_secure_pt_entry != NULL ?
+					kgsl_global_secure_pt_entry->size : 0;
+	if (mmu->secured && pagetable->name == KGSL_MMU_SECURE_PT) {
+		pt->compat_va_start = KGSL_IOMMU_SECURE_BASE +
+						secure_global_size;
+		pt->compat_va_end = KGSL_IOMMU_SECURE_END;
+		pt->va_start = KGSL_IOMMU_SECURE_BASE + secure_global_size;
+		pt->va_end = KGSL_IOMMU_SECURE_END;
+	} else {
+		pt->compat_va_start = KGSL_IOMMU_SVM_BASE32;
+		pt->compat_va_end = KGSL_IOMMU_SVM_END32;
+		pt->va_start = KGSL_IOMMU_VA_BASE64;
+		pt->va_end = KGSL_IOMMU_VA_END64;
+	}
+
+	if (pagetable->name != KGSL_MMU_GLOBAL_PT &&
+		pagetable->name != KGSL_MMU_SECURE_PT) {
+		if ((BITS_PER_LONG == 32) || is_compat_task()) {
+			pt->svm_start = KGSL_IOMMU_SVM_BASE32;
+			pt->svm_end = KGSL_IOMMU_SVM_END32;
+		} else {
+			pt->svm_start = KGSL_IOMMU_SVM_BASE64;
+			pt->svm_end = KGSL_IOMMU_SVM_END64;
+		}
+	}
+}
+
+static void setup_32bit_pagetable(struct kgsl_mmu *mmu,
+		struct kgsl_pagetable *pagetable,
+		struct kgsl_iommu_pt *pt)
+{
+	unsigned int secure_global_size = kgsl_global_secure_pt_entry != NULL ?
+					kgsl_global_secure_pt_entry->size : 0;
+	if (mmu->secured) {
+		if (pagetable->name == KGSL_MMU_SECURE_PT) {
+			pt->compat_va_start = KGSL_IOMMU_SECURE_BASE +
+						secure_global_size;
+			pt->compat_va_end = KGSL_IOMMU_SECURE_END;
+			pt->va_start = KGSL_IOMMU_SECURE_BASE +
+						secure_global_size;
+			pt->va_end = KGSL_IOMMU_SECURE_END;
+		} else {
+			pt->va_start = KGSL_IOMMU_SVM_BASE32;
+			pt->va_end = KGSL_IOMMU_SECURE_BASE +
+						secure_global_size;
+			pt->compat_va_start = pt->va_start;
+			pt->compat_va_end = pt->va_end;
+		}
+	} else {
+		pt->va_start = KGSL_IOMMU_SVM_BASE32;
+		pt->va_end = KGSL_IOMMU_GLOBAL_MEM_BASE;
+		pt->compat_va_start = pt->va_start;
+		pt->compat_va_end = pt->va_end;
+	}
+
+	if (pagetable->name != KGSL_MMU_GLOBAL_PT &&
+		pagetable->name != KGSL_MMU_SECURE_PT) {
+		pt->svm_start = KGSL_IOMMU_SVM_BASE32;
+		pt->svm_end = KGSL_IOMMU_SVM_END32;
+	}
+}
+
+
+static struct kgsl_iommu_pt *
+_alloc_pt(struct device *dev, struct kgsl_mmu *mmu, struct kgsl_pagetable *pt)
+{
+	struct kgsl_iommu_pt *iommu_pt;
+	struct bus_type *bus = kgsl_mmu_get_bus(dev);
+
+	if (bus == NULL)
+		return ERR_PTR(-ENODEV);
+
+	iommu_pt = kzalloc(sizeof(struct kgsl_iommu_pt), GFP_KERNEL);
+	if (iommu_pt == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	iommu_pt->domain = iommu_domain_alloc(bus);
+	if (iommu_pt->domain == NULL) {
+		kfree(iommu_pt);
+		return ERR_PTR(-ENODEV);
+	}
+
+	pt->pt_ops = &iommu_pt_ops;
+	pt->priv = iommu_pt;
+	pt->fault_addr = ~0ULL;
+	iommu_pt->rbtree = RB_ROOT;
+
+	if (MMU_FEATURE(mmu, KGSL_MMU_64BIT))
+		setup_64bit_pagetable(mmu, pt, iommu_pt);
+	else
+		setup_32bit_pagetable(mmu, pt, iommu_pt);
+
+
+	return iommu_pt;
+}
+
+static void _free_pt(struct kgsl_iommu_context *ctx, struct kgsl_pagetable *pt)
+{
+	struct kgsl_iommu_pt *iommu_pt = pt->priv;
+
+	pt->pt_ops = NULL;
+	pt->priv = NULL;
+
+	if (iommu_pt == NULL)
+		return;
+
+	_detach_pt(iommu_pt, ctx);
+
+	if (iommu_pt->domain != NULL)
+		iommu_domain_free(iommu_pt->domain);
+	kfree(iommu_pt);
+}
+
+static int _init_global_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt)
+{
+	int ret = 0;
+	struct kgsl_iommu_pt *iommu_pt = NULL;
+	unsigned int cb_num;
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+
+	iommu_pt = _alloc_pt(ctx->dev, mmu, pt);
+
+	if (IS_ERR(iommu_pt))
+		return PTR_ERR(iommu_pt);
+
+	if (kgsl_mmu_is_perprocess(mmu)) {
+		ret = iommu_domain_set_attr(iommu_pt->domain,
+				DOMAIN_ATTR_PROCID, &pt->name);
+		if (ret) {
+			KGSL_CORE_ERR("set DOMAIN_ATTR_PROCID failed: %d\n",
+					ret);
+			goto done;
+		}
+	}
+
+	ret = _attach_pt(iommu_pt, ctx);
+	if (ret)
+		goto done;
+
+	iommu_set_fault_handler(iommu_pt->domain,
+				kgsl_iommu_fault_handler, pt);
+
+	ret = iommu_domain_get_attr(iommu_pt->domain,
+				DOMAIN_ATTR_CONTEXT_BANK, &cb_num);
+	if (ret) {
+		KGSL_CORE_ERR("get DOMAIN_ATTR_PROCID failed: %d\n",
+				ret);
+		goto done;
+	}
+
+	ctx->cb_num = cb_num;
+	ctx->regbase = iommu->regbase + KGSL_IOMMU_CB0_OFFSET
+			+ (cb_num << KGSL_IOMMU_CB_SHIFT);
+
+	ret = iommu_domain_get_attr(iommu_pt->domain,
+			DOMAIN_ATTR_TTBR0, &iommu_pt->ttbr0);
+	if (ret) {
+		KGSL_CORE_ERR("get DOMAIN_ATTR_TTBR0 failed: %d\n",
+				ret);
+		goto done;
+	}
+	ret = iommu_domain_get_attr(iommu_pt->domain,
+			DOMAIN_ATTR_CONTEXTIDR, &iommu_pt->contextidr);
+	if (ret) {
+		KGSL_CORE_ERR("get DOMAIN_ATTR_CONTEXTIDR failed: %d\n",
+				ret);
+		goto done;
+	}
+
+	ret = kgsl_iommu_map_globals(pt);
+
+done:
+	if (ret)
+		_free_pt(ctx, pt);
+
+	return ret;
+}
+
+static int _init_secure_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt)
+{
+	int ret = 0;
+	struct kgsl_iommu_pt *iommu_pt = NULL;
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE];
+	int secure_vmid = VMID_CP_PIXEL;
+	unsigned int cb_num;
+
+	if (!mmu->secured)
+		return -EPERM;
+
+	if (!MMU_FEATURE(mmu, KGSL_MMU_HYP_SECURE_ALLOC)) {
+		if (!kgsl_mmu_bus_secured(ctx->dev))
+			return -EPERM;
+	}
+
+	iommu_pt = _alloc_pt(ctx->dev, mmu, pt);
+
+	if (IS_ERR(iommu_pt))
+		return PTR_ERR(iommu_pt);
+
+	ret = iommu_domain_set_attr(iommu_pt->domain,
+				    DOMAIN_ATTR_SECURE_VMID, &secure_vmid);
+	if (ret) {
+		KGSL_CORE_ERR("set DOMAIN_ATTR_SECURE_VMID failed: %d\n", ret);
+		goto done;
+	}
+
+	ret = _attach_pt(iommu_pt, ctx);
+
+	if (MMU_FEATURE(mmu, KGSL_MMU_HYP_SECURE_ALLOC))
+		iommu_set_fault_handler(iommu_pt->domain,
+					kgsl_iommu_fault_handler, pt);
+
+	ret = iommu_domain_get_attr(iommu_pt->domain,
+				DOMAIN_ATTR_CONTEXT_BANK, &cb_num);
+	if (ret) {
+		KGSL_CORE_ERR("get DOMAIN_ATTR_PROCID failed: %d\n",
+				ret);
+		goto done;
+	}
+
+	ctx->cb_num = cb_num;
+	ctx->regbase = iommu->regbase + KGSL_IOMMU_CB0_OFFSET
+			+ (cb_num << KGSL_IOMMU_CB_SHIFT);
+
+	ret = kgsl_map_global_secure_pt_entry(pt);
+
+done:
+	if (ret)
+		_free_pt(ctx, pt);
+	return ret;
+}
+
+static int _init_per_process_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt)
+{
+	int ret = 0;
+	struct kgsl_iommu_pt *iommu_pt = NULL;
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	int dynamic = 1;
+	unsigned int cb_num = ctx->cb_num;
+
+	iommu_pt = _alloc_pt(ctx->dev, mmu, pt);
+
+	if (IS_ERR(iommu_pt))
+		return PTR_ERR(iommu_pt);
+
+	ret = iommu_domain_set_attr(iommu_pt->domain,
+				DOMAIN_ATTR_DYNAMIC, &dynamic);
+	if (ret) {
+		KGSL_CORE_ERR("set DOMAIN_ATTR_DYNAMIC failed: %d\n", ret);
+		goto done;
+	}
+	ret = iommu_domain_set_attr(iommu_pt->domain,
+				DOMAIN_ATTR_CONTEXT_BANK, &cb_num);
+	if (ret) {
+		KGSL_CORE_ERR("set DOMAIN_ATTR_CONTEXT_BANK failed: %d\n", ret);
+		goto done;
+	}
+
+	ret = iommu_domain_set_attr(iommu_pt->domain,
+				DOMAIN_ATTR_PROCID, &pt->name);
+	if (ret) {
+		KGSL_CORE_ERR("set DOMAIN_ATTR_PROCID failed: %d\n", ret);
+		goto done;
+	}
+
+	ret = _attach_pt(iommu_pt, ctx);
+	if (ret)
+		goto done;
+
+	/* now read back the attributes needed for self programming */
+	ret = iommu_domain_get_attr(iommu_pt->domain,
+				DOMAIN_ATTR_TTBR0, &iommu_pt->ttbr0);
+	if (ret) {
+		KGSL_CORE_ERR("get DOMAIN_ATTR_TTBR0 failed: %d\n", ret);
+		goto done;
+	}
+
+	ret = iommu_domain_get_attr(iommu_pt->domain,
+				DOMAIN_ATTR_CONTEXTIDR, &iommu_pt->contextidr);
+	if (ret) {
+		KGSL_CORE_ERR("get DOMAIN_ATTR_CONTEXTIDR failed: %d\n", ret);
+		goto done;
+	}
+
+	ret = kgsl_iommu_map_globals(pt);
+
+done:
+	if (ret)
+		_free_pt(ctx, pt);
+
+	return ret;
+}
+
+/* kgsl_iommu_init_pt - Set up an IOMMU pagetable */
+static int kgsl_iommu_init_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt)
+{
+	if (pt == NULL)
+		return -EINVAL;
+
+	switch (pt->name) {
+	case KGSL_MMU_GLOBAL_PT:
+		return _init_global_pt(mmu, pt);
+
+	case KGSL_MMU_SECURE_PT:
+		return _init_secure_pt(mmu, pt);
+
+	default:
+		return _init_per_process_pt(mmu, pt);
+	}
+}
+
+static struct kgsl_pagetable *kgsl_iommu_getpagetable(struct kgsl_mmu *mmu,
+		unsigned long name)
+{
+	struct kgsl_pagetable *pt;
+
+	if (!kgsl_mmu_is_perprocess(mmu) && (name != KGSL_MMU_SECURE_PT)) {
+		name = KGSL_MMU_GLOBAL_PT;
+		if (mmu->defaultpagetable != NULL)
+			return mmu->defaultpagetable;
+	}
+
+	pt = kgsl_get_pagetable(name);
+	if (pt == NULL)
+		pt = kgsl_mmu_createpagetableobject(mmu, name);
+
+	return pt;
+}
+
+/*
+ * kgsl_iommu_get_reg_ahbaddr - Returns the ahb address of the register
+ * @mmu - Pointer to mmu structure
+ * @id - The context ID of the IOMMU ctx
+ * @reg - The register for which address is required
+ *
+ * Return - The address of register which can be used in type0 packet
+ */
+static unsigned int kgsl_iommu_get_reg_ahbaddr(struct kgsl_mmu *mmu,
+		int id, unsigned int reg)
+{
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	struct kgsl_iommu_context *ctx = &iommu->ctx[id];
+
+	return ctx->gpu_offset + kgsl_iommu_reg_list[reg];
+}
+
+static void _detach_context(struct kgsl_iommu_context *ctx)
+{
+	struct kgsl_iommu_pt *iommu_pt;
+
+	if (ctx->default_pt == NULL)
+		return;
+
+	iommu_pt = ctx->default_pt->priv;
+
+	_detach_pt(iommu_pt, ctx);
+
+	ctx->default_pt = NULL;
+}
+
+static void kgsl_iommu_close(struct kgsl_mmu *mmu)
+{
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	int i;
+
+	for (i = 0; i < KGSL_IOMMU_CONTEXT_MAX; i++)
+		_detach_context(&iommu->ctx[i]);
+
+	kgsl_mmu_putpagetable(mmu->defaultpagetable);
+	mmu->defaultpagetable = NULL;
+
+	kgsl_mmu_putpagetable(mmu->securepagetable);
+	mmu->securepagetable = NULL;
+
+	if (iommu->regbase != NULL)
+		iounmap(iommu->regbase);
+
+	kgsl_sharedmem_free(&kgsl_secure_guard_page_memdesc);
+
+	if (kgsl_guard_page != NULL) {
+		__free_page(kgsl_guard_page);
+		kgsl_guard_page = NULL;
+	}
+
+	if (kgsl_dummy_page != NULL) {
+		__free_page(kgsl_dummy_page);
+		kgsl_dummy_page = NULL;
+	}
+
+	kgsl_iommu_remove_global(mmu, &iommu->setstate);
+	kgsl_sharedmem_free(&iommu->setstate);
+	kgsl_cleanup_qdss_desc(mmu);
+}
+
+static int _setstate_alloc(struct kgsl_device *device,
+		struct kgsl_iommu *iommu)
+{
+	int ret;
+
+	ret = kgsl_sharedmem_alloc_contig(device, &iommu->setstate, PAGE_SIZE);
+
+	if (!ret) {
+		/* Mark the setstate memory as read only */
+		iommu->setstate.flags |= KGSL_MEMFLAGS_GPUREADONLY;
+
+		kgsl_sharedmem_set(device, &iommu->setstate, 0, 0, PAGE_SIZE);
+	}
+
+	return ret;
+}
+
+static int kgsl_iommu_init(struct kgsl_mmu *mmu)
+{
+	struct kgsl_device *device = KGSL_MMU_DEVICE(mmu);
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	int status;
+
+	mmu->features |= KGSL_MMU_PAGED;
+
+	if (ctx->name == NULL) {
+		KGSL_CORE_ERR("dt: gfx3d0_user context bank not found\n");
+		return -EINVAL;
+	}
+
+	status = _setstate_alloc(device, iommu);
+	if (status)
+		return status;
+
+	/* check requirements for per process pagetables */
+	if (ctx->gpu_offset == UINT_MAX) {
+		KGSL_CORE_ERR("missing qcom,gpu-offset forces global pt\n");
+		mmu->features |= KGSL_MMU_GLOBAL_PAGETABLE;
+	}
+
+	if (iommu->version == 1 && iommu->micro_mmu_ctrl == UINT_MAX) {
+		KGSL_CORE_ERR(
+			"missing qcom,micro-mmu-control forces global pt\n");
+		mmu->features |= KGSL_MMU_GLOBAL_PAGETABLE;
+	}
+
+	/* Check to see if we need to do the IOMMU sync dance */
+	need_iommu_sync = of_property_read_bool(device->pdev->dev.of_node,
+		"qcom,gpu-quirk-iommu-sync");
+
+	iommu->regbase = ioremap(iommu->regstart, iommu->regsize);
+	if (iommu->regbase == NULL) {
+		KGSL_CORE_ERR("Could not map IOMMU registers 0x%lx:0x%x\n",
+			iommu->regstart, iommu->regsize);
+		status = -ENOMEM;
+		goto done;
+	}
+
+	if (addr_entry_cache == NULL) {
+		addr_entry_cache = KMEM_CACHE(kgsl_iommu_addr_entry, 0);
+		if (addr_entry_cache == NULL) {
+			status = -ENOMEM;
+			goto done;
+		}
+	}
+
+	kgsl_iommu_add_global(mmu, &iommu->setstate, "setstate");
+	kgsl_setup_qdss_desc(device);
+
+done:
+	if (status)
+		kgsl_iommu_close(mmu);
+
+	return status;
+}
+
+static int _setup_user_context(struct kgsl_mmu *mmu)
+{
+	int ret = 0;
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	struct kgsl_device *device = KGSL_MMU_DEVICE(mmu);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+	struct kgsl_iommu_pt *iommu_pt = NULL;
+	unsigned int  sctlr_val;
+
+	if (mmu->defaultpagetable == NULL) {
+		mmu->defaultpagetable = kgsl_mmu_getpagetable(mmu,
+				KGSL_MMU_GLOBAL_PT);
+		/* if we don't have a default pagetable, nothing will work */
+		if (IS_ERR(mmu->defaultpagetable)) {
+			ret = PTR_ERR(mmu->defaultpagetable);
+			mmu->defaultpagetable = NULL;
+			return ret;
+		}
+	}
+
+	iommu_pt = mmu->defaultpagetable->priv;
+	if (iommu_pt == NULL)
+		return -ENODEV;
+
+	ret = _attach_pt(iommu_pt, ctx);
+	if (ret)
+		return ret;
+
+	ctx->default_pt = mmu->defaultpagetable;
+
+	kgsl_iommu_enable_clk(mmu);
+
+	sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR);
+
+	/*
+	 * If pagefault policy is GPUHALT_ENABLE,
+	 * 1) Program CFCFG to 1 to enable STALL mode
+	 * 2) Program HUPCF to 0 (Stall or terminate subsequent
+	 *    transactions in the presence of an outstanding fault)
+	 * else
+	 * 1) Program CFCFG to 0 to disable STALL mode (0=Terminate)
+	 * 2) Program HUPCF to 1 (Process subsequent transactions
+	 *    independently of any outstanding fault)
+	 */
+
+	if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE,
+				&adreno_dev->ft_pf_policy)) {
+		sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT);
+		sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT);
+	} else {
+		sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT);
+		sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT);
+	}
+	KGSL_IOMMU_SET_CTX_REG(ctx, SCTLR, sctlr_val);
+	kgsl_iommu_disable_clk(mmu);
+
+	return 0;
+}
+
+static int _setup_secure_context(struct kgsl_mmu *mmu)
+{
+	int ret;
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE];
+	unsigned int cb_num;
+
+	struct kgsl_iommu_pt *iommu_pt;
+
+	if (ctx->dev == NULL || !mmu->secured)
+		return 0;
+
+	if (mmu->securepagetable == NULL) {
+		mmu->securepagetable = kgsl_mmu_getpagetable(mmu,
+						KGSL_MMU_SECURE_PT);
+		if (IS_ERR(mmu->securepagetable)) {
+			ret = PTR_ERR(mmu->securepagetable);
+			mmu->securepagetable = NULL;
+			return ret;
+		} else if (mmu->securepagetable == NULL) {
+			return -ENOMEM;
+		}
+	}
+	iommu_pt = mmu->securepagetable->priv;
+
+	ret = _attach_pt(iommu_pt, ctx);
+	if (ret)
+		goto done;
+
+	ctx->default_pt = mmu->securepagetable;
+
+	ret = iommu_domain_get_attr(iommu_pt->domain, DOMAIN_ATTR_CONTEXT_BANK,
+					&cb_num);
+	if (ret) {
+		KGSL_CORE_ERR("get CONTEXT_BANK attr, err %d\n", ret);
+		goto done;
+	}
+	ctx->cb_num = cb_num;
+done:
+	if (ret)
+		_detach_context(ctx);
+	return ret;
+}
+
+static int kgsl_iommu_set_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt);
+
+static int kgsl_iommu_start(struct kgsl_mmu *mmu)
+{
+	int status;
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+
+	status = _setup_user_context(mmu);
+	if (status)
+		return status;
+
+	status = _setup_secure_context(mmu);
+	if (status) {
+		_detach_context(&iommu->ctx[KGSL_IOMMU_CONTEXT_USER]);
+		return status;
+	}
+
+	/* Make sure the hardware is programmed to the default pagetable */
+	return kgsl_iommu_set_pt(mmu, mmu->defaultpagetable);
+}
+
+static int
+kgsl_iommu_unmap_offset(struct kgsl_pagetable *pt,
+		struct kgsl_memdesc *memdesc, uint64_t addr,
+		uint64_t offset, uint64_t size)
+{
+	if (size == 0 || (size + offset) > kgsl_memdesc_footprint(memdesc))
+		return -EINVAL;
+	/*
+	 * All GPU addresses as assigned are page aligned, but some
+	 * functions perturb the gpuaddr with an offset, so apply the
+	 * mask here to make sure we have the right address.
+	 */
+
+	addr = PAGE_ALIGN(addr);
+	if (addr == 0)
+		return -EINVAL;
+
+	return _iommu_unmap_sync_pc(pt, memdesc, addr + offset, size);
+}
+
+static int
+kgsl_iommu_unmap(struct kgsl_pagetable *pt, struct kgsl_memdesc *memdesc)
+{
+	if (memdesc->size == 0 || memdesc->gpuaddr == 0)
+		return -EINVAL;
+
+	return kgsl_iommu_unmap_offset(pt, memdesc, memdesc->gpuaddr, 0,
+			kgsl_memdesc_footprint(memdesc));
+}
+
+/**
+ * _iommu_map_guard_page - Map iommu guard page
+ * @pt - Pointer to kgsl pagetable structure
+ * @memdesc - memdesc to add guard page
+ * @gpuaddr - GPU addr of guard page
+ * @protflags - flags for mapping
+ *
+ * Return 0 on success, error on map fail
+ */
+static int _iommu_map_guard_page(struct kgsl_pagetable *pt,
+				   struct kgsl_memdesc *memdesc,
+				   uint64_t gpuaddr,
+				   unsigned int protflags)
+{
+	phys_addr_t physaddr;
+
+	if (!kgsl_memdesc_has_guard_page(memdesc))
+		return 0;
+
+	/*
+	 * Allocate guard page for secure buffers.
+	 * This has to be done after we attach a smmu pagetable.
+	 * Allocate the guard page when first secure buffer is.
+	 * mapped to save 1MB of memory if CPZ is not used.
+	 */
+	if (kgsl_memdesc_is_secured(memdesc)) {
+		struct scatterlist *sg;
+		unsigned int sgp_size = pt->mmu->secure_align_mask + 1;
+
+		if (!kgsl_secure_guard_page_memdesc.sgt) {
+			if (kgsl_allocate_user(KGSL_MMU_DEVICE(pt->mmu),
+					&kgsl_secure_guard_page_memdesc,
+					sgp_size, KGSL_MEMFLAGS_SECURE)) {
+				KGSL_CORE_ERR(
+					"Secure guard page alloc failed\n");
+				return -ENOMEM;
+			}
+		}
+
+		sg = kgsl_secure_guard_page_memdesc.sgt->sgl;
+		physaddr = page_to_phys(sg_page(sg));
+	} else {
+		if (kgsl_guard_page == NULL) {
+			kgsl_guard_page = alloc_page(GFP_KERNEL | __GFP_ZERO |
+					__GFP_NORETRY | __GFP_HIGHMEM);
+			if (kgsl_guard_page == NULL)
+				return -ENOMEM;
+		}
+
+		physaddr = page_to_phys(kgsl_guard_page);
+	}
+
+	return _iommu_map_sync_pc(pt, memdesc, gpuaddr, physaddr,
+			kgsl_memdesc_guard_page_size(memdesc),
+			protflags & ~IOMMU_WRITE);
+}
+
+static unsigned int _get_protection_flags(struct kgsl_memdesc *memdesc)
+{
+	unsigned int flags = IOMMU_READ | IOMMU_WRITE | IOMMU_NOEXEC;
+
+	if (memdesc->flags & KGSL_MEMFLAGS_GPUREADONLY)
+		flags &= ~IOMMU_WRITE;
+
+	if (memdesc->priv & KGSL_MEMDESC_PRIVILEGED)
+		flags |= IOMMU_PRIV;
+
+	return flags;
+}
+
+static int
+kgsl_iommu_map(struct kgsl_pagetable *pt,
+			struct kgsl_memdesc *memdesc)
+{
+	int ret;
+	uint64_t addr = memdesc->gpuaddr;
+	uint64_t size = memdesc->size;
+	unsigned int flags = _get_protection_flags(memdesc);
+	struct sg_table *sgt = NULL;
+
+	/*
+	 * For paged memory allocated through kgsl, memdesc->pages is not NULL.
+	 * Allocate sgt here just for its map operation. Contiguous memory
+	 * already has its sgt, so no need to allocate it here.
+	 */
+	if (memdesc->pages != NULL)
+		sgt = kgsl_alloc_sgt_from_pages(memdesc);
+	else
+		sgt = memdesc->sgt;
+
+	if (IS_ERR(sgt))
+		return PTR_ERR(sgt);
+
+	ret = _iommu_map_sg_sync_pc(pt, addr, memdesc, sgt->sgl,
+				sgt->nents, flags);
+	if (ret)
+		goto done;
+
+	ret = _iommu_map_guard_page(pt, memdesc, addr + size, flags);
+	if (ret)
+		_iommu_unmap_sync_pc(pt, memdesc, addr, size);
+
+done:
+	if (memdesc->pages != NULL)
+		kgsl_free_sgt(sgt);
+
+	return ret;
+}
+
+static int kgsl_iommu_sparse_dummy_map(struct kgsl_pagetable *pt,
+		struct kgsl_memdesc *memdesc, uint64_t offset, uint64_t size)
+{
+	int ret = 0, i;
+	struct page **pages = NULL;
+	struct sg_table sgt;
+	int count = size >> PAGE_SHIFT;
+
+	/* verify the offset is within our range */
+	if (size + offset > memdesc->size)
+		return -EINVAL;
+
+	if (kgsl_dummy_page == NULL) {
+		kgsl_dummy_page = alloc_page(GFP_KERNEL | __GFP_ZERO |
+				__GFP_HIGHMEM);
+		if (kgsl_dummy_page == NULL)
+			return -ENOMEM;
+	}
+
+	pages = kcalloc(count, sizeof(struct page *), GFP_KERNEL);
+	if (pages == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++)
+		pages[i] = kgsl_dummy_page;
+
+	ret = sg_alloc_table_from_pages(&sgt, pages, count,
+			0, size, GFP_KERNEL);
+	if (ret == 0) {
+		ret = _iommu_map_sg_sync_pc(pt, memdesc->gpuaddr + offset,
+				memdesc, sgt.sgl, sgt.nents,
+				IOMMU_READ | IOMMU_NOEXEC);
+		sg_free_table(&sgt);
+	}
+
+	kfree(pages);
+
+	return ret;
+}
+
+static int _map_to_one_page(struct kgsl_pagetable *pt, uint64_t addr,
+		struct kgsl_memdesc *memdesc, uint64_t physoffset,
+		uint64_t size, unsigned int map_flags)
+{
+	int ret = 0, i;
+	int pg_sz = kgsl_memdesc_get_pagesize(memdesc);
+	int count = size >> PAGE_SHIFT;
+	struct page *page = NULL;
+	struct page **pages = NULL;
+	struct sg_page_iter sg_iter;
+	struct sg_table sgt;
+
+	/* Find our physaddr offset addr */
+	if (memdesc->pages != NULL)
+		page = memdesc->pages[physoffset >> PAGE_SHIFT];
+	else {
+		for_each_sg_page(memdesc->sgt->sgl, &sg_iter,
+				memdesc->sgt->nents, physoffset >> PAGE_SHIFT) {
+			page = sg_page_iter_page(&sg_iter);
+			break;
+		}
+	}
+
+	if (page == NULL)
+		return -EINVAL;
+
+	pages = kcalloc(count, sizeof(struct page *), GFP_KERNEL);
+	if (pages == NULL)
+		return -ENOMEM;
+
+	for (i = 0; i < count; i++) {
+		if (pg_sz != PAGE_SIZE) {
+			struct page *tmp_page = page;
+			int j;
+
+			for (j = 0; j < 16; j++, tmp_page += PAGE_SIZE)
+				pages[i++] = tmp_page;
+		} else
+			pages[i] = page;
+	}
+
+	ret = sg_alloc_table_from_pages(&sgt, pages, count,
+			0, size, GFP_KERNEL);
+	if (ret == 0) {
+		ret = _iommu_map_sg_sync_pc(pt, addr, memdesc, sgt.sgl,
+				sgt.nents, map_flags);
+		sg_free_table(&sgt);
+	}
+
+	kfree(pages);
+
+	return ret;
+}
+
+static int kgsl_iommu_map_offset(struct kgsl_pagetable *pt,
+		uint64_t virtaddr, uint64_t virtoffset,
+		struct kgsl_memdesc *memdesc, uint64_t physoffset,
+		uint64_t size, uint64_t feature_flag)
+{
+	int pg_sz;
+	unsigned int protflags = _get_protection_flags(memdesc);
+	int ret;
+	struct sg_table *sgt = NULL;
+
+	pg_sz = kgsl_memdesc_get_pagesize(memdesc);
+	if (!IS_ALIGNED(virtaddr | virtoffset | physoffset | size, pg_sz))
+		return -EINVAL;
+
+	if (size == 0)
+		return -EINVAL;
+
+	if (!(feature_flag & KGSL_SPARSE_BIND_MULTIPLE_TO_PHYS) &&
+			size + physoffset > kgsl_memdesc_footprint(memdesc))
+		return -EINVAL;
+
+	/*
+	 * For paged memory allocated through kgsl, memdesc->pages is not NULL.
+	 * Allocate sgt here just for its map operation. Contiguous memory
+	 * already has its sgt, so no need to allocate it here.
+	 */
+	if (memdesc->pages != NULL)
+		sgt = kgsl_alloc_sgt_from_pages(memdesc);
+	else
+		sgt = memdesc->sgt;
+
+	if (IS_ERR(sgt))
+		return PTR_ERR(sgt);
+
+	if (feature_flag & KGSL_SPARSE_BIND_MULTIPLE_TO_PHYS)
+		ret = _map_to_one_page(pt, virtaddr + virtoffset,
+				memdesc, physoffset, size, protflags);
+	else
+		ret = _iommu_map_sg_offset_sync_pc(pt, virtaddr + virtoffset,
+				memdesc, sgt->sgl, sgt->nents,
+				physoffset, size, protflags);
+
+	if (memdesc->pages != NULL)
+		kgsl_free_sgt(sgt);
+
+	return ret;
+}
+
+/* This function must be called with context bank attached */
+static void kgsl_iommu_clear_fsr(struct kgsl_mmu *mmu)
+{
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	struct kgsl_iommu_context  *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	unsigned int sctlr_val;
+
+	if (ctx->default_pt != NULL) {
+		kgsl_iommu_enable_clk(mmu);
+		KGSL_IOMMU_SET_CTX_REG(ctx, FSR, 0xffffffff);
+		/*
+		 * Re-enable context fault interrupts after clearing
+		 * FSR to prevent the interrupt from firing repeatedly
+		 */
+		sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR);
+		sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFIE_SHIFT);
+		KGSL_IOMMU_SET_CTX_REG(ctx, SCTLR, sctlr_val);
+		/*
+		 * Make sure the above register writes
+		 * are not reordered across the barrier
+		 * as we use writel_relaxed to write them
+		 */
+		wmb();
+		kgsl_iommu_disable_clk(mmu);
+	}
+}
+
+static void kgsl_iommu_pagefault_resume(struct kgsl_mmu *mmu)
+{
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+
+	if (ctx->default_pt != NULL && ctx->fault) {
+		/*
+		 * Write 1 to RESUME.TnR to terminate the
+		 * stalled transaction.
+		 */
+		KGSL_IOMMU_SET_CTX_REG(ctx, RESUME, 1);
+		/*
+		 * Make sure the above register writes
+		 * are not reordered across the barrier
+		 * as we use writel_relaxed to write them
+		 */
+		wmb();
+		ctx->fault = 0;
+	}
+}
+
+static void kgsl_iommu_stop(struct kgsl_mmu *mmu)
+{
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	int i;
+
+	/*
+	 * If the iommu supports retention, we don't need
+	 * to detach when stopping.
+	 */
+	if (!MMU_FEATURE(mmu, KGSL_MMU_RETENTION)) {
+		for (i = 0; i < KGSL_IOMMU_CONTEXT_MAX; i++)
+			_detach_context(&iommu->ctx[i]);
+	}
+}
+
+static u64
+kgsl_iommu_get_current_ttbr0(struct kgsl_mmu *mmu)
+{
+	u64 val;
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	/*
+	 * We cannot enable or disable the clocks in interrupt context, this
+	 * function is called from interrupt context if there is an axi error
+	 */
+	if (in_interrupt())
+		return 0;
+
+	kgsl_iommu_enable_clk(mmu);
+	val = KGSL_IOMMU_GET_CTX_REG_Q(&iommu->ctx[KGSL_IOMMU_CONTEXT_USER],
+					TTBR0);
+	kgsl_iommu_disable_clk(mmu);
+	return val;
+}
+
+/*
+ * kgsl_iommu_set_pt - Change the IOMMU pagetable of the primary context bank
+ * @mmu - Pointer to mmu structure
+ * @pt - Pagetable to switch to
+ *
+ * Set the new pagetable for the IOMMU by doing direct register writes
+ * to the IOMMU registers through the cpu
+ *
+ * Return - void
+ */
+static int kgsl_iommu_set_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt)
+{
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	uint64_t ttbr0, temp;
+	unsigned int contextidr;
+	unsigned long wait_for_flush;
+
+	if ((pt != mmu->defaultpagetable) && !kgsl_mmu_is_perprocess(mmu))
+		return 0;
+
+	kgsl_iommu_enable_clk(mmu);
+
+	ttbr0 = kgsl_mmu_pagetable_get_ttbr0(pt);
+	contextidr = kgsl_mmu_pagetable_get_contextidr(pt);
+
+	KGSL_IOMMU_SET_CTX_REG_Q(ctx, TTBR0, ttbr0);
+	KGSL_IOMMU_SET_CTX_REG(ctx, CONTEXTIDR, contextidr);
+
+	/* memory barrier before reading TTBR0 register */
+	mb();
+	temp = KGSL_IOMMU_GET_CTX_REG_Q(ctx, TTBR0);
+
+	KGSL_IOMMU_SET_CTX_REG(ctx, TLBIALL, 1);
+	/* make sure the TBLI write completes before we wait */
+	mb();
+	/*
+	 * Wait for flush to complete by polling the flush
+	 * status bit of TLBSTATUS register for not more than
+	 * 2 s. After 2s just exit, at that point the SMMU h/w
+	 * may be stuck and will eventually cause GPU to hang
+	 * or bring the system down.
+	 */
+	wait_for_flush = jiffies + msecs_to_jiffies(2000);
+	KGSL_IOMMU_SET_CTX_REG(ctx, TLBSYNC, 0);
+	while (KGSL_IOMMU_GET_CTX_REG(ctx, TLBSTATUS) &
+		(KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE)) {
+		if (time_after(jiffies, wait_for_flush)) {
+			KGSL_DRV_WARN(KGSL_MMU_DEVICE(mmu),
+			"Wait limit reached for IOMMU tlb flush\n");
+			break;
+		}
+		cpu_relax();
+	}
+
+	kgsl_iommu_disable_clk(mmu);
+	return 0;
+}
+
+/*
+ * kgsl_iommu_set_pf_policy() - Set the pagefault policy for IOMMU
+ * @mmu: Pointer to mmu structure
+ * @pf_policy: The pagefault polict to set
+ *
+ * Check if the new policy indicated by pf_policy is same as current
+ * policy, if same then return else set the policy
+ */
+static int kgsl_iommu_set_pf_policy(struct kgsl_mmu *mmu,
+				unsigned long pf_policy)
+{
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+	struct kgsl_iommu_context *ctx = &iommu->ctx[KGSL_IOMMU_CONTEXT_USER];
+	struct kgsl_device *device = KGSL_MMU_DEVICE(mmu);
+	struct adreno_device *adreno_dev = ADRENO_DEVICE(device);
+
+	if ((adreno_dev->ft_pf_policy &
+		BIT(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE)) ==
+		(pf_policy & BIT(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE)))
+		return 0;
+
+	/* If not attached, policy will be updated during the next attach */
+	if (ctx->default_pt != NULL) {
+		unsigned int sctlr_val;
+
+		kgsl_iommu_enable_clk(mmu);
+
+		sctlr_val = KGSL_IOMMU_GET_CTX_REG(ctx, SCTLR);
+
+		if (test_bit(KGSL_FT_PAGEFAULT_GPUHALT_ENABLE, &pf_policy)) {
+			sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT);
+			sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT);
+		} else {
+			sctlr_val &= ~(0x1 << KGSL_IOMMU_SCTLR_CFCFG_SHIFT);
+			sctlr_val |= (0x1 << KGSL_IOMMU_SCTLR_HUPCF_SHIFT);
+		}
+
+		KGSL_IOMMU_SET_CTX_REG(ctx, SCTLR, sctlr_val);
+
+		kgsl_iommu_disable_clk(mmu);
+	}
+
+	return 0;
+}
+
+static struct kgsl_protected_registers *
+kgsl_iommu_get_prot_regs(struct kgsl_mmu *mmu)
+{
+	struct kgsl_iommu *iommu = _IOMMU_PRIV(mmu);
+
+	return &iommu->protect;
+}
+
+static struct kgsl_iommu_addr_entry *_find_gpuaddr(
+		struct kgsl_pagetable *pagetable, uint64_t gpuaddr)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	struct rb_node *node = pt->rbtree.rb_node;
+
+	while (node != NULL) {
+		struct kgsl_iommu_addr_entry *entry = rb_entry(node,
+			struct kgsl_iommu_addr_entry, node);
+
+		if (gpuaddr < entry->base)
+			node = node->rb_left;
+		else if (gpuaddr > entry->base)
+			node = node->rb_right;
+		else
+			return entry;
+	}
+
+	return NULL;
+}
+
+static int _remove_gpuaddr(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	struct kgsl_iommu_addr_entry *entry;
+
+	entry = _find_gpuaddr(pagetable, gpuaddr);
+
+	if (entry != NULL) {
+		rb_erase(&entry->node, &pt->rbtree);
+		kmem_cache_free(addr_entry_cache, entry);
+		return 0;
+	}
+
+	WARN(1, "Couldn't remove gpuaddr: 0x%llx\n", gpuaddr);
+	return -ENOMEM;
+}
+
+static int _insert_gpuaddr(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr, uint64_t size)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	struct rb_node **node, *parent = NULL;
+	struct kgsl_iommu_addr_entry *new =
+		kmem_cache_alloc(addr_entry_cache, GFP_ATOMIC);
+
+	if (new == NULL)
+		return -ENOMEM;
+
+	new->base = gpuaddr;
+	new->size = size;
+
+	node = &pt->rbtree.rb_node;
+
+	while (*node != NULL) {
+		struct kgsl_iommu_addr_entry *this;
+
+		parent = *node;
+		this = rb_entry(parent, struct kgsl_iommu_addr_entry, node);
+
+		if (new->base < this->base)
+			node = &parent->rb_left;
+		else if (new->base > this->base)
+			node = &parent->rb_right;
+		else {
+			/* Duplicate entry */
+			WARN(1, "duplicate gpuaddr: 0x%llx\n", gpuaddr);
+			return -EEXIST;
+		}
+	}
+
+	rb_link_node(&new->node, parent, node);
+	rb_insert_color(&new->node, &pt->rbtree);
+
+	return 0;
+}
+
+static uint64_t _get_unmapped_area(struct kgsl_pagetable *pagetable,
+		uint64_t bottom, uint64_t top, uint64_t size,
+		uint64_t align)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	struct rb_node *node = rb_first(&pt->rbtree);
+	uint64_t start;
+
+	bottom = ALIGN(bottom, align);
+	start = bottom;
+
+	while (node != NULL) {
+		uint64_t gap;
+		struct kgsl_iommu_addr_entry *entry = rb_entry(node,
+			struct kgsl_iommu_addr_entry, node);
+
+		/*
+		 * Skip any entries that are outside of the range, but make sure
+		 * to account for some that might straddle the lower bound
+		 */
+		if (entry->base < bottom) {
+			if (entry->base + entry->size > bottom)
+				start = ALIGN(entry->base + entry->size, align);
+			node = rb_next(node);
+			continue;
+		}
+
+		/* Stop if we went over the top */
+		if (entry->base >= top)
+			break;
+
+		/* Make sure there is a gap to consider */
+		if (start < entry->base) {
+			gap = entry->base - start;
+
+			if (gap >= size)
+				return start;
+		}
+
+		/* Stop if there is no more room in the region */
+		if (entry->base + entry->size >= top)
+			return (uint64_t) -ENOMEM;
+
+		/* Start the next cycle at the end of the current entry */
+		start = ALIGN(entry->base + entry->size, align);
+		node = rb_next(node);
+	}
+
+	if (start + size <= top)
+		return start;
+
+	return (uint64_t) -ENOMEM;
+}
+
+static uint64_t _get_unmapped_area_topdown(struct kgsl_pagetable *pagetable,
+		uint64_t bottom, uint64_t top, uint64_t size,
+		uint64_t align)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	struct rb_node *node = rb_last(&pt->rbtree);
+	uint64_t end = top;
+	uint64_t mask = ~(align - 1);
+	struct kgsl_iommu_addr_entry *entry;
+
+	/* Make sure that the bottom is correctly aligned */
+	bottom = ALIGN(bottom, align);
+
+	/* Make sure the requested size will fit in the range */
+	if (size > (top - bottom))
+		return -ENOMEM;
+
+	/* Walk back through the list to find the highest entry in the range */
+	for (node = rb_last(&pt->rbtree); node != NULL; node = rb_prev(node)) {
+		entry = rb_entry(node, struct kgsl_iommu_addr_entry, node);
+		if (entry->base < top)
+			break;
+	}
+
+	while (node != NULL) {
+		uint64_t offset;
+
+		entry = rb_entry(node, struct kgsl_iommu_addr_entry, node);
+
+		/* If the entire entry is below the range the search is over */
+		if ((entry->base + entry->size) < bottom)
+			break;
+
+		/* Get the top of the entry properly aligned */
+		offset = ALIGN(entry->base + entry->size, align);
+
+		/*
+		 * Try to allocate the memory from the top of the gap,
+		 * making sure that it fits between the top of this entry and
+		 * the bottom of the previous one
+		 */
+
+		if ((end > size) && (offset < end)) {
+			uint64_t chunk = (end - size) & mask;
+
+			if (chunk >= offset)
+				return chunk;
+		}
+
+		/*
+		 * If we get here and the current entry is outside of the range
+		 * then we are officially out of room
+		 */
+
+		if (entry->base < bottom)
+			return (uint64_t) -ENOMEM;
+
+		/* Set the top of the gap to the current entry->base */
+		end = entry->base;
+
+		/* And move on to the next lower entry */
+		node = rb_prev(node);
+	}
+
+	/* If we get here then there are no more entries in the region */
+	if ((end > size) && (((end - size) & mask) >= bottom))
+		return (end - size) & mask;
+
+	return (uint64_t) -ENOMEM;
+}
+
+static uint64_t kgsl_iommu_find_svm_region(struct kgsl_pagetable *pagetable,
+		uint64_t start, uint64_t end, uint64_t size,
+		uint64_t alignment)
+{
+	uint64_t addr;
+
+	/* Avoid black holes */
+	if (WARN(end <= start, "Bad search range: 0x%llx-0x%llx", start, end))
+		return (uint64_t) -EINVAL;
+
+	spin_lock(&pagetable->lock);
+	addr = _get_unmapped_area_topdown(pagetable,
+			start, end, size, alignment);
+	spin_unlock(&pagetable->lock);
+	return addr;
+}
+
+static int kgsl_iommu_set_svm_region(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr, uint64_t size)
+{
+	int ret = -ENOMEM;
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	struct rb_node *node;
+
+	/* Make sure the requested address doesn't fall in the global range */
+	if (ADDR_IN_GLOBAL(gpuaddr) || ADDR_IN_GLOBAL(gpuaddr + size))
+		return -ENOMEM;
+
+	spin_lock(&pagetable->lock);
+	node = pt->rbtree.rb_node;
+
+	while (node != NULL) {
+		uint64_t start, end;
+		struct kgsl_iommu_addr_entry *entry = rb_entry(node,
+			struct kgsl_iommu_addr_entry, node);
+
+		start = entry->base;
+		end = entry->base + entry->size;
+
+		if (gpuaddr  + size <= start)
+			node = node->rb_left;
+		else if (end <= gpuaddr)
+			node = node->rb_right;
+		else
+			goto out;
+	}
+
+	ret = _insert_gpuaddr(pagetable, gpuaddr, size);
+out:
+	spin_unlock(&pagetable->lock);
+	return ret;
+}
+
+
+static int kgsl_iommu_get_gpuaddr(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	int ret = 0;
+	uint64_t addr, start, end, size;
+	unsigned int align;
+
+	if (WARN_ON(kgsl_memdesc_use_cpu_map(memdesc)))
+		return -EINVAL;
+
+	if (memdesc->flags & KGSL_MEMFLAGS_SECURE &&
+			pagetable->name != KGSL_MMU_SECURE_PT)
+		return -EINVAL;
+
+	size = kgsl_memdesc_footprint(memdesc);
+
+	align = 1 << kgsl_memdesc_get_align(memdesc);
+
+	if (memdesc->flags & KGSL_MEMFLAGS_FORCE_32BIT) {
+		start = pt->compat_va_start;
+		end = pt->compat_va_end;
+	} else {
+		start = pt->va_start;
+		end = pt->va_end;
+	}
+
+	spin_lock(&pagetable->lock);
+
+	addr = _get_unmapped_area(pagetable, start, end, size, align);
+
+	if (addr == (uint64_t) -ENOMEM) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = _insert_gpuaddr(pagetable, addr, size);
+	if (ret == 0) {
+		memdesc->gpuaddr = addr;
+		memdesc->pagetable = pagetable;
+	}
+
+out:
+	spin_unlock(&pagetable->lock);
+	return ret;
+}
+
+static void kgsl_iommu_put_gpuaddr(struct kgsl_memdesc *memdesc)
+{
+	if (memdesc->pagetable == NULL)
+		return;
+
+	spin_lock(&memdesc->pagetable->lock);
+
+	_remove_gpuaddr(memdesc->pagetable, memdesc->gpuaddr);
+
+	spin_unlock(&memdesc->pagetable->lock);
+}
+
+static int kgsl_iommu_svm_range(struct kgsl_pagetable *pagetable,
+		uint64_t *lo, uint64_t *hi, uint64_t memflags)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+	bool gpu_compat = (memflags & KGSL_MEMFLAGS_FORCE_32BIT) != 0;
+
+	if (lo != NULL)
+		*lo = gpu_compat ? pt->compat_va_start : pt->svm_start;
+	if (hi != NULL)
+		*hi = gpu_compat ? pt->compat_va_end : pt->svm_end;
+
+	return 0;
+}
+
+static bool kgsl_iommu_addr_in_range(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr)
+{
+	struct kgsl_iommu_pt *pt = pagetable->priv;
+
+	if (gpuaddr == 0)
+		return false;
+
+	if (gpuaddr >= pt->va_start && gpuaddr < pt->va_end)
+		return true;
+
+	if (gpuaddr >= pt->compat_va_start && gpuaddr < pt->compat_va_end)
+		return true;
+
+	if (gpuaddr >= pt->svm_start && gpuaddr < pt->svm_end)
+		return true;
+
+	return false;
+}
+
+static const struct {
+	int id;
+	char *name;
+} kgsl_iommu_cbs[] = {
+	{ KGSL_IOMMU_CONTEXT_USER, "gfx3d_user", },
+	{ KGSL_IOMMU_CONTEXT_SECURE, "gfx3d_secure" },
+};
+
+static int _kgsl_iommu_cb_probe(struct kgsl_device *device,
+		struct kgsl_iommu *iommu, struct device_node *node)
+{
+	struct platform_device *pdev = of_find_device_by_node(node);
+	struct kgsl_iommu_context *ctx = NULL;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(kgsl_iommu_cbs); i++) {
+		if (!strcmp(node->name, kgsl_iommu_cbs[i].name)) {
+			int id = kgsl_iommu_cbs[i].id;
+
+			ctx = &iommu->ctx[id];
+			ctx->id = id;
+			ctx->cb_num = -1;
+			ctx->name = kgsl_iommu_cbs[i].name;
+
+			break;
+		}
+	}
+
+	if (ctx == NULL) {
+		KGSL_CORE_ERR("dt: Unknown context label %s\n", node->name);
+		return -EINVAL;
+	}
+
+	if (ctx->id == KGSL_IOMMU_CONTEXT_SECURE)
+		device->mmu.secured = true;
+
+	/* this property won't be found for all context banks */
+	if (of_property_read_u32(node, "qcom,gpu-offset", &ctx->gpu_offset))
+		ctx->gpu_offset = UINT_MAX;
+
+	ctx->kgsldev = device;
+
+	/* arm-smmu driver we'll have the right device pointer here. */
+	if (of_find_property(node, "iommus", NULL)) {
+		ctx->dev = &pdev->dev;
+	} else {
+		ctx->dev = kgsl_mmu_get_ctx(ctx->name);
+
+		if (IS_ERR(ctx->dev))
+			return PTR_ERR(ctx->dev);
+	}
+
+	return 0;
+}
+
+static const struct {
+	char *feature;
+	int bit;
+} kgsl_iommu_features[] = {
+	{ "qcom,retention", KGSL_MMU_RETENTION },
+	{ "qcom,global_pt", KGSL_MMU_GLOBAL_PAGETABLE },
+	{ "qcom,hyp_secure_alloc", KGSL_MMU_HYP_SECURE_ALLOC },
+	{ "qcom,force-32bit", KGSL_MMU_FORCE_32BIT },
+};
+
+static int _kgsl_iommu_probe(struct kgsl_device *device,
+		struct device_node *node)
+{
+	const char *cname;
+	struct property *prop;
+	u32 reg_val[2];
+	int i = 0;
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+	struct device_node *child;
+	struct platform_device *pdev = of_find_device_by_node(node);
+
+	memset(iommu, 0, sizeof(*iommu));
+
+	if (of_device_is_compatible(node, "qcom,kgsl-smmu-v1"))
+		iommu->version = 1;
+	else
+		iommu->version = 2;
+
+	if (of_property_read_u32_array(node, "reg", reg_val, 2)) {
+		KGSL_CORE_ERR("dt: Unable to read KGSL IOMMU register range\n");
+		return -EINVAL;
+	}
+	iommu->regstart = reg_val[0];
+	iommu->regsize = reg_val[1];
+
+	/* Protecting the SMMU registers is mandatory */
+	if (of_property_read_u32_array(node, "qcom,protect", reg_val, 2)) {
+		KGSL_CORE_ERR("dt: no iommu protection range specified\n");
+		return -EINVAL;
+	}
+	iommu->protect.base = reg_val[0] / sizeof(u32);
+	iommu->protect.range = ilog2(reg_val[1] / sizeof(u32));
+
+	of_property_for_each_string(node, "clock-names", prop, cname) {
+		struct clk *c = devm_clk_get(&pdev->dev, cname);
+
+		if (IS_ERR(c)) {
+			KGSL_CORE_ERR("dt: Couldn't get clock: %s\n", cname);
+			return -ENODEV;
+		}
+		if (i >= KGSL_IOMMU_MAX_CLKS) {
+			KGSL_CORE_ERR("dt: too many clocks defined.\n");
+			return -EINVAL;
+		}
+
+		iommu->clks[i] = c;
+		++i;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(kgsl_iommu_features); i++) {
+		if (of_property_read_bool(node, kgsl_iommu_features[i].feature))
+			device->mmu.features |= kgsl_iommu_features[i].bit;
+	}
+
+	if (of_property_read_u32(node, "qcom,micro-mmu-control",
+		&iommu->micro_mmu_ctrl))
+		iommu->micro_mmu_ctrl = UINT_MAX;
+
+	if (of_property_read_u32(node, "qcom,secure_align_mask",
+		&device->mmu.secure_align_mask))
+		device->mmu.secure_align_mask = 0xfff;
+
+	/* Fill out the rest of the devices in the node */
+	of_platform_populate(node, NULL, NULL, &pdev->dev);
+
+	for_each_child_of_node(node, child) {
+		int ret;
+
+		if (!of_device_is_compatible(child, "qcom,smmu-kgsl-cb"))
+			continue;
+
+		ret = _kgsl_iommu_cb_probe(device, iommu, child);
+		if (ret)
+			return ret;
+	}
+
+	return 0;
+}
+
+static const struct {
+	char *compat;
+	int (*probe)(struct kgsl_device *device, struct device_node *node);
+} kgsl_dt_devices[] = {
+	{ "qcom,kgsl-smmu-v1", _kgsl_iommu_probe },
+	{ "qcom,kgsl-smmu-v2", _kgsl_iommu_probe },
+};
+
+static int kgsl_iommu_probe(struct kgsl_device *device)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(kgsl_dt_devices); i++) {
+		struct device_node *node;
+
+		node = of_find_compatible_node(device->pdev->dev.of_node,
+			NULL, kgsl_dt_devices[i].compat);
+
+		if (node != NULL)
+			return kgsl_dt_devices[i].probe(device, node);
+	}
+
+	return -ENODEV;
+}
+
+struct kgsl_mmu_ops kgsl_iommu_ops = {
+	.mmu_init = kgsl_iommu_init,
+	.mmu_close = kgsl_iommu_close,
+	.mmu_start = kgsl_iommu_start,
+	.mmu_stop = kgsl_iommu_stop,
+	.mmu_set_pt = kgsl_iommu_set_pt,
+	.mmu_clear_fsr = kgsl_iommu_clear_fsr,
+	.mmu_get_current_ttbr0 = kgsl_iommu_get_current_ttbr0,
+	.mmu_enable_clk = kgsl_iommu_enable_clk,
+	.mmu_disable_clk = kgsl_iommu_disable_clk,
+	.mmu_get_reg_ahbaddr = kgsl_iommu_get_reg_ahbaddr,
+	.mmu_pt_equal = kgsl_iommu_pt_equal,
+	.mmu_set_pf_policy = kgsl_iommu_set_pf_policy,
+	.mmu_pagefault_resume = kgsl_iommu_pagefault_resume,
+	.mmu_get_prot_regs = kgsl_iommu_get_prot_regs,
+	.mmu_init_pt = kgsl_iommu_init_pt,
+	.mmu_add_global = kgsl_iommu_add_global,
+	.mmu_remove_global = kgsl_iommu_remove_global,
+	.mmu_getpagetable = kgsl_iommu_getpagetable,
+	.mmu_get_qdss_global_entry = kgsl_iommu_get_qdss_global_entry,
+	.probe = kgsl_iommu_probe,
+};
+
+static struct kgsl_mmu_pt_ops iommu_pt_ops = {
+	.mmu_map = kgsl_iommu_map,
+	.mmu_unmap = kgsl_iommu_unmap,
+	.mmu_destroy_pagetable = kgsl_iommu_destroy_pagetable,
+	.get_ttbr0 = kgsl_iommu_get_ttbr0,
+	.get_contextidr = kgsl_iommu_get_contextidr,
+	.get_gpuaddr = kgsl_iommu_get_gpuaddr,
+	.put_gpuaddr = kgsl_iommu_put_gpuaddr,
+	.set_svm_region = kgsl_iommu_set_svm_region,
+	.find_svm_region = kgsl_iommu_find_svm_region,
+	.svm_range = kgsl_iommu_svm_range,
+	.addr_in_range = kgsl_iommu_addr_in_range,
+	.mmu_map_offset = kgsl_iommu_map_offset,
+	.mmu_unmap_offset = kgsl_iommu_unmap_offset,
+	.mmu_sparse_dummy_map = kgsl_iommu_sparse_dummy_map,
+};
diff --git a/drivers/gpu/msm/kgsl_iommu.h b/drivers/gpu/msm/kgsl_iommu.h
new file mode 100644
index 0000000..6337a48
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_iommu.h
@@ -0,0 +1,210 @@
+/* Copyright (c) 2012-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_IOMMU_H
+#define __KGSL_IOMMU_H
+
+#ifdef CONFIG_QCOM_IOMMU
+#include <linux/qcom_iommu.h>
+#endif
+#include <linux/of.h>
+#include "kgsl.h"
+
+/*
+ * These defines control the address range for allocations that
+ * are mapped into all pagetables.
+ */
+#define KGSL_IOMMU_GLOBAL_MEM_SIZE	SZ_8M
+#define KGSL_IOMMU_GLOBAL_MEM_BASE	0xf8000000
+
+#define KGSL_IOMMU_SECURE_SIZE SZ_256M
+#define KGSL_IOMMU_SECURE_END KGSL_IOMMU_GLOBAL_MEM_BASE
+#define KGSL_IOMMU_SECURE_BASE	\
+	(KGSL_IOMMU_GLOBAL_MEM_BASE - KGSL_IOMMU_SECURE_SIZE)
+
+#define KGSL_IOMMU_SVM_BASE32		0x300000
+#define KGSL_IOMMU_SVM_END32		(0xC0000000 - SZ_16M)
+
+#define KGSL_IOMMU_VA_BASE64		0x500000000ULL
+#define KGSL_IOMMU_VA_END64		0x600000000ULL
+/*
+ * Note: currently we only support 36 bit addresses,
+ * but the CPU supports 39. Eventually this range
+ * should change to high part of the 39 bit address
+ * space just like the CPU.
+ */
+#define KGSL_IOMMU_SVM_BASE64		0x700000000ULL
+#define KGSL_IOMMU_SVM_END64		0x800000000ULL
+
+/* TLBSTATUS register fields */
+#define KGSL_IOMMU_CTX_TLBSTATUS_SACTIVE BIT(0)
+
+/* IMPLDEF_MICRO_MMU_CTRL register fields */
+#define KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_HALT  0x00000004
+#define KGSL_IOMMU_IMPLDEF_MICRO_MMU_CTRL_IDLE  0x00000008
+
+/* SCTLR fields */
+#define KGSL_IOMMU_SCTLR_HUPCF_SHIFT		8
+#define KGSL_IOMMU_SCTLR_CFCFG_SHIFT		7
+#define KGSL_IOMMU_SCTLR_CFIE_SHIFT		6
+
+enum kgsl_iommu_reg_map {
+	KGSL_IOMMU_CTX_SCTLR = 0,
+	KGSL_IOMMU_CTX_TTBR0,
+	KGSL_IOMMU_CTX_CONTEXTIDR,
+	KGSL_IOMMU_CTX_FSR,
+	KGSL_IOMMU_CTX_FAR,
+	KGSL_IOMMU_CTX_TLBIALL,
+	KGSL_IOMMU_CTX_RESUME,
+	KGSL_IOMMU_CTX_FSYNR0,
+	KGSL_IOMMU_CTX_FSYNR1,
+	KGSL_IOMMU_CTX_TLBSYNC,
+	KGSL_IOMMU_CTX_TLBSTATUS,
+	KGSL_IOMMU_REG_MAX
+};
+
+/* Max number of iommu clks per IOMMU unit */
+#define KGSL_IOMMU_MAX_CLKS 5
+
+enum kgsl_iommu_context_id {
+	KGSL_IOMMU_CONTEXT_USER = 0,
+	KGSL_IOMMU_CONTEXT_SECURE = 1,
+	KGSL_IOMMU_CONTEXT_MAX,
+};
+
+/* offset at which a nop command is placed in setstate */
+#define KGSL_IOMMU_SETSTATE_NOP_OFFSET	1024
+
+/*
+ * struct kgsl_iommu_context - Structure holding data about an iommu context
+ * bank
+ * @dev: pointer to the iommu context's device
+ * @name: context name
+ * @id: The id of the context, used for deciding how it is used.
+ * @cb_num: The hardware context bank number, used for calculating register
+ *		offsets.
+ * @kgsldev: The kgsl device that uses this context.
+ * @fault: Flag when set indicates that this iommu device has caused a page
+ * fault
+ * @gpu_offset: Offset of this context bank in the GPU register space
+ * @default_pt: The default pagetable for this context,
+ *		it may be changed by self programming.
+ */
+struct kgsl_iommu_context {
+	struct device *dev;
+	const char *name;
+	enum kgsl_iommu_context_id id;
+	unsigned int cb_num;
+	struct kgsl_device *kgsldev;
+	int fault;
+	void __iomem *regbase;
+	unsigned int gpu_offset;
+	struct kgsl_pagetable *default_pt;
+};
+
+/*
+ * struct kgsl_iommu - Structure holding iommu data for kgsl driver
+ * @ctx: Array of kgsl_iommu_context structs
+ * @regbase: Virtual address of the IOMMU register base
+ * @regstart: Physical address of the iommu registers
+ * @regsize: Length of the iommu register region.
+ * @setstate: Scratch GPU memory for IOMMU operations
+ * @clk_enable_count: The ref count of clock enable calls
+ * @clks: Array of pointers to IOMMU clocks
+ * @micro_mmu_ctrl: GPU register offset of this glob al register
+ * @smmu_info: smmu info used in a5xx preemption
+ * @protect: register protection settings for the iommu.
+ * @pagefault_suppression_count: Total number of pagefaults
+ *				 suppressed since boot.
+ */
+struct kgsl_iommu {
+	struct kgsl_iommu_context ctx[KGSL_IOMMU_CONTEXT_MAX];
+	void __iomem *regbase;
+	unsigned long regstart;
+	unsigned int regsize;
+	struct kgsl_memdesc setstate;
+	atomic_t clk_enable_count;
+	struct clk *clks[KGSL_IOMMU_MAX_CLKS];
+	unsigned int micro_mmu_ctrl;
+	struct kgsl_memdesc smmu_info;
+	unsigned int version;
+	struct kgsl_protected_registers protect;
+	u32 pagefault_suppression_count;
+};
+
+/*
+ * struct kgsl_iommu_pt - Iommu pagetable structure private to kgsl driver
+ * @domain: Pointer to the iommu domain that contains the iommu pagetable
+ * @ttbr0: register value to set when using this pagetable
+ * @contextidr: register value to set when using this pagetable
+ * @attached: is the pagetable attached?
+ * @rbtree: all buffers mapped into the pagetable, indexed by gpuaddr
+ * @va_start: Start of virtual range used in this pagetable.
+ * @va_end: End of virtual range.
+ * @svm_start: Start of shared virtual memory range. Addresses in this
+ *		range are also valid in the process's CPU address space.
+ * @svm_end: End of the shared virtual memory range.
+ * @svm_start: 32 bit compatible range, for old clients who lack bits
+ * @svm_end: end of 32 bit compatible range
+ */
+struct kgsl_iommu_pt {
+	struct iommu_domain *domain;
+	u64 ttbr0;
+	u32 contextidr;
+	bool attached;
+
+	struct rb_root rbtree;
+
+	uint64_t va_start;
+	uint64_t va_end;
+	uint64_t svm_start;
+	uint64_t svm_end;
+	uint64_t compat_va_start;
+	uint64_t compat_va_end;
+};
+
+/*
+ * offset of context bank 0 from the start of the SMMU register space.
+ */
+#define KGSL_IOMMU_CB0_OFFSET		0x8000
+/* size of each context bank's register space */
+#define KGSL_IOMMU_CB_SHIFT		12
+
+/* Macros to read/write IOMMU registers */
+extern const unsigned int kgsl_iommu_reg_list[KGSL_IOMMU_REG_MAX];
+
+/*
+ * Don't use this function directly. Use the macros below to read/write
+ * IOMMU registers.
+ */
+static inline void __iomem *
+kgsl_iommu_reg(struct kgsl_iommu_context *ctx, enum kgsl_iommu_reg_map reg)
+{
+	return ctx->regbase + kgsl_iommu_reg_list[reg];
+}
+
+#define KGSL_IOMMU_SET_CTX_REG_Q(_ctx, REG, val) \
+		writeq_relaxed((val), \
+			kgsl_iommu_reg((_ctx), KGSL_IOMMU_CTX_##REG))
+
+#define KGSL_IOMMU_GET_CTX_REG_Q(_ctx, REG) \
+		readq_relaxed(kgsl_iommu_reg((_ctx), KGSL_IOMMU_CTX_##REG))
+
+#define KGSL_IOMMU_SET_CTX_REG(_ctx, REG, val) \
+		writel_relaxed((val), \
+			kgsl_iommu_reg((_ctx), KGSL_IOMMU_CTX_##REG))
+
+#define KGSL_IOMMU_GET_CTX_REG(_ctx, REG) \
+		readl_relaxed(kgsl_iommu_reg((_ctx), KGSL_IOMMU_CTX_##REG))
+
+
+#endif
diff --git a/drivers/gpu/msm/kgsl_log.h b/drivers/gpu/msm/kgsl_log.h
new file mode 100644
index 0000000..d79a410
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_log.h
@@ -0,0 +1,109 @@
+/* Copyright (c) 2002,2008-2011,2013-2014,2016 The Linux Foundation.
+ * All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_LOG_H
+#define __KGSL_LOG_H
+
+#define KGSL_LOG_INFO(dev, lvl, fmt, args...) \
+	do { \
+		if ((lvl) >= 6)  \
+			dev_info(dev, "|%s| " fmt, \
+					__func__, ##args);\
+	} while (0)
+
+#define KGSL_LOG_WARN(dev, lvl, fmt, args...) \
+	do { \
+		if ((lvl) >= 4)  \
+			dev_warn(dev, "|%s| " fmt, \
+					__func__, ##args);\
+	} while (0)
+
+#define KGSL_LOG_ERR(dev, lvl, fmt, args...) \
+	do { \
+		if ((lvl) >= 3)  \
+			dev_err(dev, "|%s| " fmt, \
+					__func__, ##args);\
+	} while (0)
+
+#define KGSL_LOG_CRIT(dev, lvl, fmt, args...) \
+	do { \
+		if ((lvl) >= 2) \
+			dev_crit(dev, "|%s| " fmt, \
+					__func__, ##args);\
+	} while (0)
+
+#define KGSL_LOG_FATAL(dev, lvl, fmt, args...) \
+	do { \
+		dev_crit(dev, "|%s| " fmt, __func__, ##args);\
+		BUG(); \
+	} while (0)
+
+#define KGSL_LOG_DUMP(_dev, fmt, args...)	dev_err(_dev->dev, fmt, ##args)
+
+#define KGSL_DEV_ERR_ONCE(_dev, fmt, args...) \
+({ \
+	static bool kgsl_dev_err_once; \
+							\
+	if (!kgsl_dev_err_once) { \
+		kgsl_dev_err_once = true; \
+		dev_crit(_dev->dev, "|%s| " fmt, __func__, ##args); \
+	} \
+})
+
+#define KGSL_LOG_CRIT_RATELIMITED(dev, lvl, fmt, args...) \
+	do { \
+		if ((lvl) >= 2) \
+			dev_crit_ratelimited(dev, "|%s| " fmt, \
+					__func__, ##args);\
+	} while (0)
+
+#define KGSL_DRV_INFO(_dev, fmt, args...) \
+KGSL_LOG_INFO(_dev->dev, _dev->drv_log, fmt, ##args)
+#define KGSL_DRV_WARN(_dev, fmt, args...) \
+KGSL_LOG_WARN(_dev->dev, _dev->drv_log, fmt, ##args)
+#define KGSL_DRV_ERR(_dev, fmt, args...)  \
+KGSL_LOG_ERR(_dev->dev, _dev->drv_log, fmt, ##args)
+#define KGSL_DRV_CRIT(_dev, fmt, args...) \
+KGSL_LOG_CRIT(_dev->dev, _dev->drv_log, fmt, ##args)
+#define KGSL_DRV_CRIT_RATELIMIT(_dev, fmt, args...) \
+KGSL_LOG_CRIT_RATELIMITED(_dev->dev, _dev->drv_log, fmt, ##args)
+#define KGSL_DRV_FATAL(_dev, fmt, args...) \
+KGSL_LOG_FATAL((_dev)->dev, (_dev)->drv_log, fmt, ##args)
+
+#define KGSL_MEM_INFO(_dev, fmt, args...) \
+KGSL_LOG_INFO(_dev->dev, _dev->mem_log, fmt, ##args)
+#define KGSL_MEM_WARN(_dev, fmt, args...) \
+KGSL_LOG_WARN(_dev->dev, _dev->mem_log, fmt, ##args)
+#define KGSL_MEM_ERR(_dev, fmt, args...)  \
+KGSL_LOG_ERR(_dev->dev, _dev->mem_log, fmt, ##args)
+#define KGSL_MEM_CRIT(_dev, fmt, args...) \
+KGSL_LOG_CRIT(_dev->dev, _dev->mem_log, fmt, ##args)
+
+#define KGSL_PWR_INFO(_dev, fmt, args...) \
+KGSL_LOG_INFO(_dev->dev, _dev->pwr_log, fmt, ##args)
+#define KGSL_PWR_WARN(_dev, fmt, args...) \
+KGSL_LOG_WARN(_dev->dev, _dev->pwr_log, fmt, ##args)
+#define KGSL_PWR_ERR(_dev, fmt, args...) \
+KGSL_LOG_ERR(_dev->dev, _dev->pwr_log, fmt, ##args)
+#define KGSL_PWR_CRIT(_dev, fmt, args...) \
+KGSL_LOG_CRIT(_dev->dev, _dev->pwr_log, fmt, ##args)
+
+/*
+ * Core error messages - these are for core KGSL functions that have
+ * no device associated with them (such as memory)
+ */
+
+#define KGSL_CORE_ERR(fmt, args...) \
+pr_err("kgsl: %s: " fmt, __func__, ##args)
+
+#endif /* __KGSL_LOG_H */
diff --git a/drivers/gpu/msm/kgsl_mmu.c b/drivers/gpu/msm/kgsl_mmu.c
new file mode 100644
index 0000000..9e516e1
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_mmu.c
@@ -0,0 +1,766 @@
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#include <linux/export.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/spinlock.h>
+#include <linux/genalloc.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+
+#include "kgsl.h"
+#include "kgsl_mmu.h"
+#include "kgsl_device.h"
+#include "kgsl_sharedmem.h"
+
+static void pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable);
+
+static void kgsl_destroy_pagetable(struct kref *kref)
+{
+	struct kgsl_pagetable *pagetable = container_of(kref,
+		struct kgsl_pagetable, refcount);
+
+	kgsl_mmu_detach_pagetable(pagetable);
+
+	if (PT_OP_VALID(pagetable, mmu_destroy_pagetable))
+		pagetable->pt_ops->mmu_destroy_pagetable(pagetable);
+
+	kfree(pagetable);
+}
+
+static inline void kgsl_put_pagetable(struct kgsl_pagetable *pagetable)
+{
+	if (pagetable)
+		kref_put(&pagetable->refcount, kgsl_destroy_pagetable);
+}
+
+struct kgsl_pagetable *
+kgsl_get_pagetable(unsigned long name)
+{
+	struct kgsl_pagetable *pt, *ret = NULL;
+	unsigned long flags;
+
+	spin_lock_irqsave(&kgsl_driver.ptlock, flags);
+	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
+		if (name == pt->name && kref_get_unless_zero(&pt->refcount)) {
+			ret = pt;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
+	return ret;
+}
+
+static struct kgsl_pagetable *
+_get_pt_from_kobj(struct kobject *kobj)
+{
+	unsigned int ptname;
+
+	if (!kobj)
+		return NULL;
+
+	if (kstrtou32(kobj->name, 0, &ptname))
+		return NULL;
+
+	return kgsl_get_pagetable(ptname);
+}
+
+static ssize_t
+sysfs_show_entries(struct kobject *kobj,
+		   struct kobj_attribute *attr,
+		   char *buf)
+{
+	struct kgsl_pagetable *pt;
+	int ret = 0;
+
+	pt = _get_pt_from_kobj(kobj);
+
+	if (pt) {
+		unsigned int val = atomic_read(&pt->stats.entries);
+
+		ret += snprintf(buf, PAGE_SIZE, "%d\n", val);
+	}
+
+	kgsl_put_pagetable(pt);
+	return ret;
+}
+
+static ssize_t
+sysfs_show_mapped(struct kobject *kobj,
+		  struct kobj_attribute *attr,
+		  char *buf)
+{
+	struct kgsl_pagetable *pt;
+	int ret = 0;
+
+	pt = _get_pt_from_kobj(kobj);
+
+	if (pt) {
+		uint64_t val = atomic_long_read(&pt->stats.mapped);
+
+		ret += snprintf(buf, PAGE_SIZE, "%llu\n", val);
+	}
+
+	kgsl_put_pagetable(pt);
+	return ret;
+}
+
+static ssize_t
+sysfs_show_max_mapped(struct kobject *kobj,
+		      struct kobj_attribute *attr,
+		      char *buf)
+{
+	struct kgsl_pagetable *pt;
+	int ret = 0;
+
+	pt = _get_pt_from_kobj(kobj);
+
+	if (pt) {
+		uint64_t val = atomic_long_read(&pt->stats.max_mapped);
+
+		ret += snprintf(buf, PAGE_SIZE, "%llu\n", val);
+	}
+
+	kgsl_put_pagetable(pt);
+	return ret;
+}
+
+static struct kobj_attribute attr_entries = {
+	.attr = { .name = "entries", .mode = 0444 },
+	.show = sysfs_show_entries,
+	.store = NULL,
+};
+
+static struct kobj_attribute attr_mapped = {
+	.attr = { .name = "mapped", .mode = 0444 },
+	.show = sysfs_show_mapped,
+	.store = NULL,
+};
+
+static struct kobj_attribute attr_max_mapped = {
+	.attr = { .name = "max_mapped", .mode = 0444 },
+	.show = sysfs_show_max_mapped,
+	.store = NULL,
+};
+
+static struct attribute *pagetable_attrs[] = {
+	&attr_entries.attr,
+	&attr_mapped.attr,
+	&attr_max_mapped.attr,
+	NULL,
+};
+
+static struct attribute_group pagetable_attr_group = {
+	.attrs = pagetable_attrs,
+};
+
+static void
+pagetable_remove_sysfs_objects(struct kgsl_pagetable *pagetable)
+{
+	if (pagetable->kobj)
+		sysfs_remove_group(pagetable->kobj,
+				   &pagetable_attr_group);
+
+	kobject_put(pagetable->kobj);
+	pagetable->kobj = NULL;
+}
+
+static int
+pagetable_add_sysfs_objects(struct kgsl_pagetable *pagetable)
+{
+	char ptname[16];
+	int ret = -ENOMEM;
+
+	snprintf(ptname, sizeof(ptname), "%d", pagetable->name);
+	pagetable->kobj = kobject_create_and_add(ptname,
+						 kgsl_driver.ptkobj);
+	if (pagetable->kobj == NULL)
+		goto err;
+
+	ret = sysfs_create_group(pagetable->kobj, &pagetable_attr_group);
+
+err:
+	if (ret) {
+		if (pagetable->kobj)
+			kobject_put(pagetable->kobj);
+
+		pagetable->kobj = NULL;
+	}
+
+	return ret;
+}
+
+void
+kgsl_mmu_detach_pagetable(struct kgsl_pagetable *pagetable)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&kgsl_driver.ptlock, flags);
+
+	if (!list_empty(&pagetable->list))
+		list_del_init(&pagetable->list);
+
+	spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
+
+	pagetable_remove_sysfs_objects(pagetable);
+}
+
+struct kgsl_pagetable *kgsl_mmu_get_pt_from_ptname(struct kgsl_mmu *mmu,
+						int ptname)
+{
+	struct kgsl_pagetable *pt;
+
+	spin_lock(&kgsl_driver.ptlock);
+	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
+		if (pt->name == ptname) {
+			spin_unlock(&kgsl_driver.ptlock);
+			return pt;
+		}
+	}
+	spin_unlock(&kgsl_driver.ptlock);
+	return NULL;
+
+}
+EXPORT_SYMBOL(kgsl_mmu_get_pt_from_ptname);
+
+unsigned int
+kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu, u64 pt_base,
+		uint64_t addr)
+{
+	struct kgsl_pagetable *pt;
+	unsigned int ret = 0;
+
+	if (!MMU_OP_VALID(mmu, mmu_pt_equal))
+		return 0;
+
+	spin_lock(&kgsl_driver.ptlock);
+	list_for_each_entry(pt, &kgsl_driver.pagetable_list, list) {
+		if (mmu->mmu_ops->mmu_pt_equal(mmu, pt, pt_base)) {
+			if ((addr & ~(PAGE_SIZE-1)) == pt->fault_addr) {
+				ret = 1;
+				break;
+			}
+			pt->fault_addr = (addr & ~(PAGE_SIZE-1));
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock(&kgsl_driver.ptlock);
+
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_mmu_log_fault_addr);
+
+int kgsl_mmu_init(struct kgsl_device *device)
+{
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	if (MMU_OP_VALID(mmu, mmu_init))
+		return mmu->mmu_ops->mmu_init(mmu);
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_mmu_init);
+
+int kgsl_mmu_start(struct kgsl_device *device)
+{
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	if (MMU_OP_VALID(mmu, mmu_start))
+		return mmu->mmu_ops->mmu_start(mmu);
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_mmu_start);
+
+struct kgsl_pagetable *
+kgsl_mmu_createpagetableobject(struct kgsl_mmu *mmu, unsigned int name)
+{
+	int status = 0;
+	struct kgsl_pagetable *pagetable = NULL;
+	unsigned long flags;
+
+	pagetable = kzalloc(sizeof(struct kgsl_pagetable), GFP_KERNEL);
+	if (pagetable == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	kref_init(&pagetable->refcount);
+
+	spin_lock_init(&pagetable->lock);
+
+	pagetable->mmu = mmu;
+	pagetable->name = name;
+
+	atomic_set(&pagetable->stats.entries, 0);
+	atomic_long_set(&pagetable->stats.mapped, 0);
+	atomic_long_set(&pagetable->stats.max_mapped, 0);
+
+	if (MMU_OP_VALID(mmu, mmu_init_pt)) {
+		status = mmu->mmu_ops->mmu_init_pt(mmu, pagetable);
+		if (status) {
+			kfree(pagetable);
+			return ERR_PTR(status);
+		}
+	}
+
+	spin_lock_irqsave(&kgsl_driver.ptlock, flags);
+	list_add(&pagetable->list, &kgsl_driver.pagetable_list);
+	spin_unlock_irqrestore(&kgsl_driver.ptlock, flags);
+
+	/* Create the sysfs entries */
+	pagetable_add_sysfs_objects(pagetable);
+
+	return pagetable;
+}
+
+void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable)
+{
+	kgsl_put_pagetable(pagetable);
+}
+EXPORT_SYMBOL(kgsl_mmu_putpagetable);
+
+/**
+ * kgsl_mmu_find_svm_region() - Find a empty spot in the SVM region
+ * @pagetable: KGSL pagetable to search
+ * @start: start of search range, must be within kgsl_mmu_svm_range()
+ * @end: end of search range, must be within kgsl_mmu_svm_range()
+ * @size: Size of the region to find
+ * @align: Desired alignment of the address
+ */
+uint64_t kgsl_mmu_find_svm_region(struct kgsl_pagetable *pagetable,
+		uint64_t start, uint64_t end, uint64_t size,
+		uint64_t align)
+{
+	if (PT_OP_VALID(pagetable, find_svm_region))
+		return pagetable->pt_ops->find_svm_region(pagetable, start,
+			end, size, align);
+	return -ENOMEM;
+}
+
+/**
+ * kgsl_mmu_set_svm_region() - Check if a region is empty and reserve it if so
+ * @pagetable: KGSL pagetable to search
+ * @gpuaddr: GPU address to check/reserve
+ * @size: Size of the region to check/reserve
+ */
+int kgsl_mmu_set_svm_region(struct kgsl_pagetable *pagetable, uint64_t gpuaddr,
+		uint64_t size)
+{
+	if (PT_OP_VALID(pagetable, set_svm_region))
+		return pagetable->pt_ops->set_svm_region(pagetable, gpuaddr,
+			size);
+	return -ENOMEM;
+}
+
+/**
+ * kgsl_mmu_get_gpuaddr() - Assign a GPU address to the memdesc
+ * @pagetable: GPU pagetable to assign the address in
+ * @memdesc: mem descriptor to assign the memory to
+ */
+int
+kgsl_mmu_get_gpuaddr(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc)
+{
+	if (PT_OP_VALID(pagetable, get_gpuaddr))
+		return pagetable->pt_ops->get_gpuaddr(pagetable, memdesc);
+
+	return -ENOMEM;
+}
+EXPORT_SYMBOL(kgsl_mmu_get_gpuaddr);
+
+int
+kgsl_mmu_map(struct kgsl_pagetable *pagetable,
+				struct kgsl_memdesc *memdesc)
+{
+	int size;
+
+	if (!memdesc->gpuaddr)
+		return -EINVAL;
+	if (!(memdesc->flags & (KGSL_MEMFLAGS_SPARSE_VIRT |
+					KGSL_MEMFLAGS_SPARSE_PHYS))) {
+		/* Only global mappings should be mapped multiple times */
+		if (!kgsl_memdesc_is_global(memdesc) &&
+				(KGSL_MEMDESC_MAPPED & memdesc->priv))
+			return -EINVAL;
+	}
+
+	size = kgsl_memdesc_footprint(memdesc);
+
+	if (PT_OP_VALID(pagetable, mmu_map)) {
+		int ret;
+
+		ret = pagetable->pt_ops->mmu_map(pagetable, memdesc);
+		if (ret)
+			return ret;
+
+		atomic_inc(&pagetable->stats.entries);
+		KGSL_STATS_ADD(size, &pagetable->stats.mapped,
+				&pagetable->stats.max_mapped);
+
+		/* This is needed for non-sparse mappings */
+		memdesc->priv |= KGSL_MEMDESC_MAPPED;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_mmu_map);
+
+/**
+ * kgsl_mmu_put_gpuaddr() - Remove a GPU address from a pagetable
+ * @pagetable: Pagetable to release the memory from
+ * @memdesc: Memory descriptor containing the GPU address to free
+ */
+void kgsl_mmu_put_gpuaddr(struct kgsl_memdesc *memdesc)
+{
+	struct kgsl_pagetable *pagetable = memdesc->pagetable;
+	int unmap_fail = 0;
+
+	if (memdesc->size == 0 || memdesc->gpuaddr == 0)
+		return;
+
+	if (!kgsl_memdesc_is_global(memdesc))
+		unmap_fail = kgsl_mmu_unmap(pagetable, memdesc);
+
+	/*
+	 * Do not free the gpuaddr/size if unmap fails. Because if we
+	 * try to map this range in future, the iommu driver will throw
+	 * a BUG_ON() because it feels we are overwriting a mapping.
+	 */
+	if (PT_OP_VALID(pagetable, put_gpuaddr) && (unmap_fail == 0))
+		pagetable->pt_ops->put_gpuaddr(memdesc);
+
+	if (!kgsl_memdesc_is_global(memdesc))
+		memdesc->gpuaddr = 0;
+
+	memdesc->pagetable = NULL;
+}
+EXPORT_SYMBOL(kgsl_mmu_put_gpuaddr);
+
+/**
+ * kgsl_mmu_svm_range() - Return the range for SVM (if applicable)
+ * @pagetable: Pagetable to query the range from
+ * @lo: Pointer to store the start of the SVM range
+ * @hi: Pointer to store the end of the SVM range
+ * @memflags: Flags from the buffer we are mapping
+ */
+int kgsl_mmu_svm_range(struct kgsl_pagetable *pagetable,
+		uint64_t *lo, uint64_t *hi, uint64_t memflags)
+{
+	if (PT_OP_VALID(pagetable, svm_range))
+		return pagetable->pt_ops->svm_range(pagetable, lo, hi,
+			memflags);
+
+	return -ENODEV;
+}
+EXPORT_SYMBOL(kgsl_mmu_svm_range);
+
+int
+kgsl_mmu_unmap(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc)
+{
+	int ret = 0;
+
+	if (memdesc->size == 0)
+		return -EINVAL;
+
+	if (!(memdesc->flags & (KGSL_MEMFLAGS_SPARSE_VIRT |
+					KGSL_MEMFLAGS_SPARSE_PHYS))) {
+		/* Only global mappings should be mapped multiple times */
+		if (!(KGSL_MEMDESC_MAPPED & memdesc->priv))
+			return -EINVAL;
+	}
+
+	if (PT_OP_VALID(pagetable, mmu_unmap)) {
+		uint64_t size;
+
+		size = kgsl_memdesc_footprint(memdesc);
+
+		ret = pagetable->pt_ops->mmu_unmap(pagetable, memdesc);
+
+		atomic_dec(&pagetable->stats.entries);
+		atomic_long_sub(size, &pagetable->stats.mapped);
+
+		if (!kgsl_memdesc_is_global(memdesc))
+			memdesc->priv &= ~KGSL_MEMDESC_MAPPED;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_mmu_unmap);
+
+int kgsl_mmu_map_offset(struct kgsl_pagetable *pagetable,
+			uint64_t virtaddr, uint64_t virtoffset,
+			struct kgsl_memdesc *memdesc, uint64_t physoffset,
+			uint64_t size, uint64_t flags)
+{
+	if (PT_OP_VALID(pagetable, mmu_map_offset)) {
+		int ret;
+
+		ret = pagetable->pt_ops->mmu_map_offset(pagetable, virtaddr,
+				virtoffset, memdesc, physoffset, size, flags);
+		if (ret)
+			return ret;
+
+		atomic_inc(&pagetable->stats.entries);
+		KGSL_STATS_ADD(size, &pagetable->stats.mapped,
+				&pagetable->stats.max_mapped);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_mmu_map_offset);
+
+int kgsl_mmu_unmap_offset(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc, uint64_t addr, uint64_t offset,
+		uint64_t size)
+{
+	if (PT_OP_VALID(pagetable, mmu_unmap_offset)) {
+		int ret;
+
+		ret = pagetable->pt_ops->mmu_unmap_offset(pagetable, memdesc,
+				addr, offset, size);
+		if (ret)
+			return ret;
+
+		atomic_dec(&pagetable->stats.entries);
+		atomic_long_sub(size, &pagetable->stats.mapped);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_mmu_unmap_offset);
+
+int kgsl_mmu_sparse_dummy_map(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc, uint64_t offset, uint64_t size)
+{
+	if (PT_OP_VALID(pagetable, mmu_sparse_dummy_map)) {
+		int ret;
+
+		ret = pagetable->pt_ops->mmu_sparse_dummy_map(pagetable,
+				memdesc, offset, size);
+		if (ret)
+			return ret;
+
+		atomic_dec(&pagetable->stats.entries);
+		atomic_long_sub(size, &pagetable->stats.mapped);
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_mmu_sparse_dummy_map);
+
+void kgsl_mmu_remove_global(struct kgsl_device *device,
+		struct kgsl_memdesc *memdesc)
+{
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	if (MMU_OP_VALID(mmu, mmu_remove_global))
+		mmu->mmu_ops->mmu_remove_global(mmu, memdesc);
+}
+EXPORT_SYMBOL(kgsl_mmu_remove_global);
+
+void kgsl_mmu_add_global(struct kgsl_device *device,
+		struct kgsl_memdesc *memdesc, const char *name)
+{
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	if (MMU_OP_VALID(mmu, mmu_add_global))
+		mmu->mmu_ops->mmu_add_global(mmu, memdesc, name);
+}
+EXPORT_SYMBOL(kgsl_mmu_add_global);
+
+void kgsl_mmu_close(struct kgsl_device *device)
+{
+	struct kgsl_mmu *mmu = &(device->mmu);
+
+	if (MMU_OP_VALID(mmu, mmu_close))
+		mmu->mmu_ops->mmu_close(mmu);
+}
+EXPORT_SYMBOL(kgsl_mmu_close);
+
+enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device)
+{
+	return device ? device->mmu.type : KGSL_MMU_TYPE_NONE;
+}
+EXPORT_SYMBOL(kgsl_mmu_get_mmutype);
+
+bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr)
+{
+	if (PT_OP_VALID(pagetable, addr_in_range))
+		return pagetable->pt_ops->addr_in_range(pagetable, gpuaddr);
+
+	return false;
+}
+EXPORT_SYMBOL(kgsl_mmu_gpuaddr_in_range);
+
+struct kgsl_memdesc *kgsl_mmu_get_qdss_global_entry(struct kgsl_device *device)
+{
+	struct kgsl_mmu *mmu = &device->mmu;
+
+	if (MMU_OP_VALID(mmu, mmu_get_qdss_global_entry))
+		return mmu->mmu_ops->mmu_get_qdss_global_entry();
+
+	return NULL;
+}
+EXPORT_SYMBOL(kgsl_mmu_get_qdss_global_entry);
+
+/*
+ * NOMMU definitions - NOMMU really just means that the MMU is kept in pass
+ * through and the GPU directly accesses physical memory. Used in debug mode
+ * and when a real MMU isn't up and running yet.
+ */
+
+static bool nommu_gpuaddr_in_range(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr)
+{
+	return (gpuaddr != 0) ? true : false;
+}
+
+static int nommu_get_gpuaddr(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc)
+{
+	if (memdesc->sgt->nents > 1) {
+		WARN_ONCE(1,
+			"Attempt to map non-contiguous memory with NOMMU\n");
+		return -EINVAL;
+	}
+
+	memdesc->gpuaddr = (uint64_t) sg_phys(memdesc->sgt->sgl);
+
+	if (memdesc->gpuaddr) {
+		memdesc->pagetable = pagetable;
+		return 0;
+	}
+
+	return -ENOMEM;
+}
+
+static struct kgsl_mmu_pt_ops nommu_pt_ops = {
+	.get_gpuaddr = nommu_get_gpuaddr,
+	.addr_in_range = nommu_gpuaddr_in_range,
+};
+
+static void nommu_add_global(struct kgsl_mmu *mmu,
+		struct kgsl_memdesc *memdesc, const char *name)
+{
+	memdesc->gpuaddr = (uint64_t) sg_phys(memdesc->sgt->sgl);
+}
+
+static void nommu_remove_global(struct kgsl_mmu *mmu,
+		struct kgsl_memdesc *memdesc)
+{
+	memdesc->gpuaddr = 0;
+}
+
+static int nommu_init_pt(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt)
+{
+	if (pt == NULL)
+		return -EINVAL;
+
+	pt->pt_ops = &nommu_pt_ops;
+	return 0;
+}
+
+static struct kgsl_pagetable *nommu_getpagetable(struct kgsl_mmu *mmu,
+		unsigned long name)
+{
+	struct kgsl_pagetable *pagetable;
+
+	pagetable = kgsl_get_pagetable(KGSL_MMU_GLOBAL_PT);
+
+	if (pagetable == NULL)
+		pagetable = kgsl_mmu_createpagetableobject(mmu,
+			KGSL_MMU_GLOBAL_PT);
+
+	return pagetable;
+}
+
+static int nommu_init(struct kgsl_mmu *mmu)
+{
+	mmu->features |= KGSL_MMU_GLOBAL_PAGETABLE;
+	return 0;
+}
+
+static int nommu_probe(struct kgsl_device *device)
+{
+	/* NOMMU always exists */
+	return 0;
+}
+
+static struct kgsl_mmu_ops kgsl_nommu_ops = {
+	.mmu_init = nommu_init,
+	.mmu_add_global = nommu_add_global,
+	.mmu_remove_global = nommu_remove_global,
+	.mmu_init_pt = nommu_init_pt,
+	.mmu_getpagetable = nommu_getpagetable,
+	.probe = nommu_probe,
+};
+
+static struct {
+	const char *name;
+	unsigned int type;
+	struct kgsl_mmu_ops *ops;
+} kgsl_mmu_subtypes[] = {
+#ifdef CONFIG_QCOM_KGSL_IOMMU
+	{ "iommu", KGSL_MMU_TYPE_IOMMU, &kgsl_iommu_ops },
+#endif
+	{ "nommu", KGSL_MMU_TYPE_NONE, &kgsl_nommu_ops },
+};
+
+int kgsl_mmu_probe(struct kgsl_device *device, char *mmutype)
+{
+	struct kgsl_mmu *mmu = &device->mmu;
+	int ret, i;
+
+	if (mmutype != NULL) {
+		for (i = 0; i < ARRAY_SIZE(kgsl_mmu_subtypes); i++) {
+			if (strcmp(kgsl_mmu_subtypes[i].name, mmutype))
+				continue;
+
+			ret = kgsl_mmu_subtypes[i].ops->probe(device);
+
+			if (ret == 0) {
+				mmu->type = kgsl_mmu_subtypes[i].type;
+				mmu->mmu_ops = kgsl_mmu_subtypes[i].ops;
+
+				if (MMU_OP_VALID(mmu, mmu_init))
+					return mmu->mmu_ops->mmu_init(mmu);
+			}
+
+			return ret;
+		}
+
+		KGSL_CORE_ERR("mmu: MMU type '%s' unknown\n", mmutype);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(kgsl_mmu_subtypes); i++) {
+		ret = kgsl_mmu_subtypes[i].ops->probe(device);
+
+		if (ret == 0) {
+			mmu->type = kgsl_mmu_subtypes[i].type;
+			mmu->mmu_ops = kgsl_mmu_subtypes[i].ops;
+
+			if (MMU_OP_VALID(mmu, mmu_init))
+				return mmu->mmu_ops->mmu_init(mmu);
+
+			return 0;
+		}
+	}
+
+	KGSL_CORE_ERR("mmu: couldn't detect any known MMU types\n");
+	return -ENODEV;
+}
+EXPORT_SYMBOL(kgsl_mmu_probe);
diff --git a/drivers/gpu/msm/kgsl_mmu.h b/drivers/gpu/msm/kgsl_mmu.h
new file mode 100644
index 0000000..0f9f486
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_mmu.h
@@ -0,0 +1,430 @@
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_MMU_H
+#define __KGSL_MMU_H
+
+#include "kgsl_iommu.h"
+
+/* Identifier for the global page table */
+/*
+ * Per process page tables will probably pass in the thread group
+ *  as an identifier
+ */
+#define KGSL_MMU_GLOBAL_PT 0
+#define KGSL_MMU_SECURE_PT 1
+
+#define MMU_DEFAULT_TTBR0(_d) \
+	(kgsl_mmu_pagetable_get_ttbr0((_d)->mmu.defaultpagetable))
+
+#define MMU_DEFAULT_CONTEXTIDR(_d) \
+	(kgsl_mmu_pagetable_get_contextidr((_d)->mmu.defaultpagetable))
+
+struct kgsl_device;
+
+enum kgsl_mmutype {
+	KGSL_MMU_TYPE_IOMMU = 0,
+	KGSL_MMU_TYPE_NONE
+};
+
+struct kgsl_pagetable {
+	spinlock_t lock;
+	struct kref refcount;
+	struct list_head list;
+	unsigned int name;
+	struct kobject *kobj;
+
+	struct {
+		atomic_t entries;
+		atomic_long_t mapped;
+		atomic_long_t max_mapped;
+	} stats;
+	const struct kgsl_mmu_pt_ops *pt_ops;
+	uint64_t fault_addr;
+	void *priv;
+	struct kgsl_mmu *mmu;
+};
+
+struct kgsl_mmu;
+
+struct kgsl_mmu_ops {
+	int (*probe)(struct kgsl_device *device);
+	int (*mmu_init)(struct kgsl_mmu *mmu);
+	void (*mmu_close)(struct kgsl_mmu *mmu);
+	int (*mmu_start)(struct kgsl_mmu *mmu);
+	void (*mmu_stop)(struct kgsl_mmu *mmu);
+	int (*mmu_set_pt)(struct kgsl_mmu *mmu, struct kgsl_pagetable *pt);
+	uint64_t (*mmu_get_current_ttbr0)(struct kgsl_mmu *mmu);
+	void (*mmu_pagefault_resume)(struct kgsl_mmu *mmu);
+	void (*mmu_clear_fsr)(struct kgsl_mmu *mmu);
+	void (*mmu_enable_clk)(struct kgsl_mmu *mmu);
+	void (*mmu_disable_clk)(struct kgsl_mmu *mmu);
+	unsigned int (*mmu_get_reg_ahbaddr)(struct kgsl_mmu *mmu,
+			int ctx_id, unsigned int reg);
+	bool (*mmu_pt_equal)(struct kgsl_mmu *mmu,
+			struct kgsl_pagetable *pt, u64 ttbr0);
+	int (*mmu_set_pf_policy)(struct kgsl_mmu *mmu, unsigned long pf_policy);
+	struct kgsl_protected_registers *(*mmu_get_prot_regs)
+			(struct kgsl_mmu *mmu);
+	int (*mmu_init_pt)(struct kgsl_mmu *mmu, struct kgsl_pagetable *);
+	void (*mmu_add_global)(struct kgsl_mmu *mmu,
+			struct kgsl_memdesc *memdesc, const char *name);
+	void (*mmu_remove_global)(struct kgsl_mmu *mmu,
+			struct kgsl_memdesc *memdesc);
+	struct kgsl_pagetable * (*mmu_getpagetable)(struct kgsl_mmu *mmu,
+			unsigned long name);
+	struct kgsl_memdesc* (*mmu_get_qdss_global_entry)(void);
+};
+
+struct kgsl_mmu_pt_ops {
+	int (*mmu_map)(struct kgsl_pagetable *pt,
+			struct kgsl_memdesc *memdesc);
+	int (*mmu_unmap)(struct kgsl_pagetable *pt,
+			struct kgsl_memdesc *memdesc);
+	void (*mmu_destroy_pagetable)(struct kgsl_pagetable *);
+	u64 (*get_ttbr0)(struct kgsl_pagetable *);
+	u32 (*get_contextidr)(struct kgsl_pagetable *);
+	int (*get_gpuaddr)(struct kgsl_pagetable *, struct kgsl_memdesc *);
+	void (*put_gpuaddr)(struct kgsl_memdesc *);
+	uint64_t (*find_svm_region)(struct kgsl_pagetable *, uint64_t, uint64_t,
+		uint64_t, uint64_t);
+	int (*set_svm_region)(struct kgsl_pagetable *, uint64_t, uint64_t);
+	int (*svm_range)(struct kgsl_pagetable *, uint64_t *, uint64_t *,
+			uint64_t);
+	bool (*addr_in_range)(struct kgsl_pagetable *pagetable, uint64_t);
+	int (*mmu_map_offset)(struct kgsl_pagetable *pt,
+			uint64_t virtaddr, uint64_t virtoffset,
+			struct kgsl_memdesc *memdesc, uint64_t physoffset,
+			uint64_t size, uint64_t flags);
+	int (*mmu_unmap_offset)(struct kgsl_pagetable *pt,
+			struct kgsl_memdesc *memdesc, uint64_t addr,
+			uint64_t offset, uint64_t size);
+	int (*mmu_sparse_dummy_map)(struct kgsl_pagetable *pt,
+			struct kgsl_memdesc *memdesc, uint64_t offset,
+			uint64_t size);
+};
+
+/*
+ * MMU_FEATURE - return true if the specified feature is supported by the GPU
+ * MMU
+ */
+#define MMU_FEATURE(_mmu, _bit) \
+	((_mmu)->features & (_bit))
+
+/* MMU has register retention */
+#define KGSL_MMU_RETENTION  BIT(1)
+/* MMU requires the TLB to be flushed on map */
+#define KGSL_MMU_FLUSH_TLB_ON_MAP BIT(2)
+/* MMU uses global pagetable */
+#define KGSL_MMU_GLOBAL_PAGETABLE BIT(3)
+/* MMU uses hypervisor for content protection */
+#define KGSL_MMU_HYP_SECURE_ALLOC BIT(4)
+/* Force 32 bit, even if the MMU can do 64 bit */
+#define KGSL_MMU_FORCE_32BIT BIT(5)
+/* 64 bit address is live */
+#define KGSL_MMU_64BIT BIT(6)
+/* The MMU supports non-contigious pages */
+#define KGSL_MMU_PAGED BIT(8)
+/* The device requires a guard page */
+#define KGSL_MMU_NEED_GUARD_PAGE BIT(9)
+
+/**
+ * struct kgsl_mmu - Master definition for KGSL MMU devices
+ * @flags: MMU device flags
+ * @type: Type of MMU that is attached
+ * @defaultpagetable: Default pagetable object for the MMU
+ * @securepagetable: Default secure pagetable object for the MMU
+ * @mmu_ops: Function pointers for the MMU sub-type
+ * @secured: True if the MMU needs to be secured
+ * @feature: Static list of MMU features
+ * @secure_aligned_mask: Mask that secure buffers need to be aligned to
+ * @priv: Union of sub-device specific members
+ */
+struct kgsl_mmu {
+	unsigned long flags;
+	enum kgsl_mmutype type;
+	struct kgsl_pagetable *defaultpagetable;
+	struct kgsl_pagetable *securepagetable;
+	const struct kgsl_mmu_ops *mmu_ops;
+	bool secured;
+	unsigned long features;
+	unsigned int secure_align_mask;
+	union {
+		struct kgsl_iommu iommu;
+	} priv;
+};
+
+#define KGSL_IOMMU_PRIV(_device) (&((_device)->mmu.priv.iommu))
+
+extern struct kgsl_mmu_ops kgsl_iommu_ops;
+
+int kgsl_mmu_probe(struct kgsl_device *device, char *name);
+int kgsl_mmu_start(struct kgsl_device *device);
+struct kgsl_pagetable *kgsl_mmu_getpagetable_ptbase(struct kgsl_mmu *mmu,
+						u64 ptbase);
+
+void kgsl_add_global_secure_entry(struct kgsl_device *device,
+					struct kgsl_memdesc *memdesc);
+void kgsl_print_global_pt_entries(struct seq_file *s);
+void kgsl_mmu_putpagetable(struct kgsl_pagetable *pagetable);
+
+int kgsl_mmu_get_gpuaddr(struct kgsl_pagetable *pagetable,
+		 struct kgsl_memdesc *memdesc);
+int kgsl_mmu_map(struct kgsl_pagetable *pagetable,
+		 struct kgsl_memdesc *memdesc);
+int kgsl_mmu_unmap(struct kgsl_pagetable *pagetable,
+		    struct kgsl_memdesc *memdesc);
+void kgsl_mmu_put_gpuaddr(struct kgsl_memdesc *memdesc);
+unsigned int kgsl_virtaddr_to_physaddr(void *virtaddr);
+unsigned int kgsl_mmu_log_fault_addr(struct kgsl_mmu *mmu,
+		u64 ttbr0, uint64_t addr);
+enum kgsl_mmutype kgsl_mmu_get_mmutype(struct kgsl_device *device);
+bool kgsl_mmu_gpuaddr_in_range(struct kgsl_pagetable *pt, uint64_t gpuaddr);
+
+int kgsl_mmu_get_region(struct kgsl_pagetable *pagetable,
+		uint64_t gpuaddr, uint64_t size);
+
+int kgsl_mmu_find_region(struct kgsl_pagetable *pagetable,
+		uint64_t region_start, uint64_t region_end,
+		uint64_t *gpuaddr, uint64_t size, unsigned int align);
+
+void kgsl_mmu_add_global(struct kgsl_device *device,
+	struct kgsl_memdesc *memdesc, const char *name);
+void kgsl_mmu_remove_global(struct kgsl_device *device,
+		struct kgsl_memdesc *memdesc);
+
+struct kgsl_pagetable *kgsl_mmu_get_pt_from_ptname(struct kgsl_mmu *mmu,
+							int ptname);
+void kgsl_mmu_close(struct kgsl_device *device);
+
+uint64_t kgsl_mmu_find_svm_region(struct kgsl_pagetable *pagetable,
+		uint64_t start, uint64_t end, uint64_t size,
+		uint64_t alignment);
+
+int kgsl_mmu_set_svm_region(struct kgsl_pagetable *pagetable, uint64_t gpuaddr,
+		uint64_t size);
+
+void kgsl_mmu_detach_pagetable(struct kgsl_pagetable *pagetable);
+
+int kgsl_mmu_svm_range(struct kgsl_pagetable *pagetable,
+		uint64_t *lo, uint64_t *hi, uint64_t memflags);
+
+struct kgsl_pagetable *kgsl_get_pagetable(unsigned long name);
+
+struct kgsl_pagetable *
+kgsl_mmu_createpagetableobject(struct kgsl_mmu *mmu, unsigned int name);
+
+int kgsl_mmu_map_offset(struct kgsl_pagetable *pagetable,
+		uint64_t virtaddr, uint64_t virtoffset,
+		struct kgsl_memdesc *memdesc, uint64_t physoffset,
+		uint64_t size, uint64_t flags);
+int kgsl_mmu_unmap_offset(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc, uint64_t addr, uint64_t offset,
+		uint64_t size);
+
+struct kgsl_memdesc *kgsl_mmu_get_qdss_global_entry(struct kgsl_device *device);
+
+int kgsl_mmu_sparse_dummy_map(struct kgsl_pagetable *pagetable,
+		struct kgsl_memdesc *memdesc, uint64_t offset, uint64_t size);
+
+/*
+ * Static inline functions of MMU that simply call the SMMU specific
+ * function using a function pointer. These functions can be thought
+ * of as wrappers around the actual function
+ */
+
+#define MMU_OP_VALID(_mmu, _field) \
+	(((_mmu) != NULL) && \
+	 ((_mmu)->mmu_ops != NULL) && \
+	 ((_mmu)->mmu_ops->_field != NULL))
+
+#define PT_OP_VALID(_pt, _field) \
+	(((_pt) != NULL) && \
+	 ((_pt)->pt_ops != NULL) && \
+	 ((_pt)->pt_ops->_field != NULL))
+
+static inline u64 kgsl_mmu_get_current_ttbr0(struct kgsl_mmu *mmu)
+{
+	if (MMU_OP_VALID(mmu, mmu_get_current_ttbr0))
+		return mmu->mmu_ops->mmu_get_current_ttbr0(mmu);
+
+	return 0;
+}
+
+static inline struct kgsl_pagetable *kgsl_mmu_getpagetable(struct kgsl_mmu *mmu,
+		unsigned long name)
+{
+	if (MMU_OP_VALID(mmu, mmu_getpagetable))
+		return mmu->mmu_ops->mmu_getpagetable(mmu, name);
+
+	return NULL;
+}
+
+static inline int kgsl_mmu_set_pt(struct kgsl_mmu *mmu,
+					struct kgsl_pagetable *pagetable)
+{
+	if (MMU_OP_VALID(mmu, mmu_set_pt))
+		return mmu->mmu_ops->mmu_set_pt(mmu, pagetable);
+
+	return 0;
+}
+
+static inline void kgsl_mmu_stop(struct kgsl_mmu *mmu)
+{
+	if (MMU_OP_VALID(mmu, mmu_stop))
+		mmu->mmu_ops->mmu_stop(mmu);
+}
+
+static inline bool kgsl_mmu_pt_equal(struct kgsl_mmu *mmu,
+			struct kgsl_pagetable *pt, u64 ttbr0)
+{
+	if (MMU_OP_VALID(mmu, mmu_pt_equal))
+		return mmu->mmu_ops->mmu_pt_equal(mmu, pt, ttbr0);
+
+	return false;
+}
+
+static inline void kgsl_mmu_enable_clk(struct kgsl_mmu *mmu)
+{
+	if (MMU_OP_VALID(mmu, mmu_enable_clk))
+		mmu->mmu_ops->mmu_enable_clk(mmu);
+}
+
+static inline void kgsl_mmu_disable_clk(struct kgsl_mmu *mmu)
+{
+	if (MMU_OP_VALID(mmu, mmu_disable_clk))
+		mmu->mmu_ops->mmu_disable_clk(mmu);
+}
+
+/*
+ * kgsl_mmu_get_reg_ahbaddr() - Calls the mmu specific function pointer to
+ * return the address that GPU can use to access register
+ * @mmu:		Pointer to the device mmu
+ * @ctx_id:		The MMU HW context ID
+ * @reg:		Register whose address is to be returned
+ *
+ * Returns the ahb address of reg else 0
+ */
+static inline unsigned int kgsl_mmu_get_reg_ahbaddr(struct kgsl_mmu *mmu,
+				int ctx_id, unsigned int reg)
+{
+	if (MMU_OP_VALID(mmu, mmu_get_reg_ahbaddr))
+		return mmu->mmu_ops->mmu_get_reg_ahbaddr(mmu, ctx_id, reg);
+
+	return 0;
+}
+
+static inline int kgsl_mmu_set_pagefault_policy(struct kgsl_mmu *mmu,
+						unsigned long pf_policy)
+{
+	if (MMU_OP_VALID(mmu, mmu_set_pf_policy))
+		return mmu->mmu_ops->mmu_set_pf_policy(mmu, pf_policy);
+
+	return 0;
+}
+
+static inline void kgsl_mmu_pagefault_resume(struct kgsl_mmu *mmu)
+{
+	if (MMU_OP_VALID(mmu, mmu_pagefault_resume))
+		return mmu->mmu_ops->mmu_pagefault_resume(mmu);
+}
+
+static inline void kgsl_mmu_clear_fsr(struct kgsl_mmu *mmu)
+{
+	if (MMU_OP_VALID(mmu, mmu_clear_fsr))
+		return mmu->mmu_ops->mmu_clear_fsr(mmu);
+}
+
+static inline struct kgsl_protected_registers *kgsl_mmu_get_prot_regs
+						(struct kgsl_mmu *mmu)
+{
+	if (MMU_OP_VALID(mmu, mmu_get_prot_regs))
+		return mmu->mmu_ops->mmu_get_prot_regs(mmu);
+
+	return NULL;
+}
+
+static inline int kgsl_mmu_is_perprocess(struct kgsl_mmu *mmu)
+{
+	return MMU_FEATURE(mmu, KGSL_MMU_GLOBAL_PAGETABLE) ? 0 : 1;
+}
+
+static inline int kgsl_mmu_use_cpu_map(struct kgsl_mmu *mmu)
+{
+	return kgsl_mmu_is_perprocess(mmu);
+}
+
+static inline int kgsl_mmu_is_secured(struct kgsl_mmu *mmu)
+{
+	return mmu && (mmu->secured) && (mmu->securepagetable);
+}
+
+static inline u64
+kgsl_mmu_pagetable_get_ttbr0(struct kgsl_pagetable *pagetable)
+{
+	if (PT_OP_VALID(pagetable, get_ttbr0))
+		return pagetable->pt_ops->get_ttbr0(pagetable);
+
+	return 0;
+}
+
+static inline u32
+kgsl_mmu_pagetable_get_contextidr(struct kgsl_pagetable *pagetable)
+{
+	if (PT_OP_VALID(pagetable, get_contextidr))
+		return pagetable->pt_ops->get_contextidr(pagetable);
+
+	return 0;
+}
+
+#ifdef CONFIG_QCOM_IOMMU
+#include <linux/qcom_iommu.h>
+#ifndef CONFIG_ARM_SMMU
+static inline bool kgsl_mmu_bus_secured(struct device *dev)
+{
+	struct bus_type *bus = msm_iommu_get_bus(dev);
+
+	return (bus == &msm_iommu_sec_bus_type) ? true : false;
+}
+#else
+static inline bool kgsl_mmu_bus_secured(struct device *dev)
+{
+	/* ARM driver contains all context banks on single bus */
+	return true;
+}
+#endif /* CONFIG_ARM_SMMU */
+static inline struct bus_type *kgsl_mmu_get_bus(struct device *dev)
+{
+	return msm_iommu_get_bus(dev);
+}
+static inline struct device *kgsl_mmu_get_ctx(const char *name)
+{
+	return msm_iommu_get_ctx(name);
+}
+#else
+static inline bool kgsl_mmu_bus_secured(struct device *dev)
+{
+	/*ARM driver contains all context banks on single bus */
+	return true;
+}
+
+static inline struct bus_type *kgsl_mmu_get_bus(struct device *dev)
+{
+	return &platform_bus_type;
+}
+static inline struct device *kgsl_mmu_get_ctx(const char *name)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
+
+#endif /* __KGSL_MMU_H */
diff --git a/drivers/gpu/msm/kgsl_pool.c b/drivers/gpu/msm/kgsl_pool.c
new file mode 100644
index 0000000..6ecbab4
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pool.c
@@ -0,0 +1,509 @@
+/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
+#include <linux/version.h>
+
+#include "kgsl.h"
+#include "kgsl_device.h"
+#include "kgsl_pool.h"
+
+#define KGSL_MAX_POOLS 4
+#define KGSL_MAX_POOL_ORDER 8
+#define KGSL_MAX_RESERVED_PAGES 4096
+
+/**
+ * struct kgsl_page_pool - Structure to hold information for the pool
+ * @pool_order: Page order describing the size of the page
+ * @page_count: Number of pages currently present in the pool
+ * @reserved_pages: Number of pages reserved at init for the pool
+ * @allocation_allowed: Tells if reserved pool gets exhausted, can we allocate
+ * from system memory
+ * @list_lock: Spinlock for page list in the pool
+ * @page_list: List of pages held/reserved in this pool
+ */
+struct kgsl_page_pool {
+	unsigned int pool_order;
+	int page_count;
+	unsigned int reserved_pages;
+	bool allocation_allowed;
+	spinlock_t list_lock;
+	struct list_head page_list;
+};
+
+static struct kgsl_page_pool kgsl_pools[KGSL_MAX_POOLS];
+static int kgsl_num_pools;
+static int kgsl_pool_max_pages;
+
+
+/* Returns KGSL pool corresponding to input page order*/
+static struct kgsl_page_pool *
+_kgsl_get_pool_from_order(unsigned int order)
+{
+	int i;
+
+	for (i = 0; i < kgsl_num_pools; i++) {
+		if (kgsl_pools[i].pool_order == order)
+			return &kgsl_pools[i];
+	}
+
+	return NULL;
+}
+
+/* Map the page into kernel and zero it out */
+static void
+_kgsl_pool_zero_page(struct page *p, unsigned int pool_order)
+{
+	int i;
+
+	for (i = 0; i < (1 << pool_order); i++) {
+		struct page *page = nth_page(p, i);
+		void *addr = kmap_atomic(page);
+
+		memset(addr, 0, PAGE_SIZE);
+		dmac_flush_range(addr, addr + PAGE_SIZE);
+		kunmap_atomic(addr);
+	}
+}
+
+/* Add a page to specified pool */
+static void
+_kgsl_pool_add_page(struct kgsl_page_pool *pool, struct page *p)
+{
+	_kgsl_pool_zero_page(p, pool->pool_order);
+
+	spin_lock(&pool->list_lock);
+	list_add_tail(&p->lru, &pool->page_list);
+	pool->page_count++;
+	spin_unlock(&pool->list_lock);
+}
+
+/* Returns a page from specified pool */
+static struct page *
+_kgsl_pool_get_page(struct kgsl_page_pool *pool)
+{
+	struct page *p = NULL;
+
+	spin_lock(&pool->list_lock);
+	if (pool->page_count) {
+		p = list_first_entry(&pool->page_list, struct page, lru);
+		pool->page_count--;
+		list_del(&p->lru);
+	}
+	spin_unlock(&pool->list_lock);
+
+	return p;
+}
+
+/* Returns the number of pages in specified pool */
+static int
+kgsl_pool_size(struct kgsl_page_pool *kgsl_pool)
+{
+	int size;
+
+	spin_lock(&kgsl_pool->list_lock);
+	size = kgsl_pool->page_count * (1 << kgsl_pool->pool_order);
+	spin_unlock(&kgsl_pool->list_lock);
+
+	return size;
+}
+
+/* Returns the number of pages in all kgsl page pools */
+static int kgsl_pool_size_total(void)
+{
+	int i;
+	int total = 0;
+
+	for (i = 0; i < kgsl_num_pools; i++)
+		total += kgsl_pool_size(&kgsl_pools[i]);
+	return total;
+}
+
+/*
+ * This will shrink the specified pool by num_pages or its pool_size,
+ * whichever is smaller.
+ */
+static unsigned int
+_kgsl_pool_shrink(struct kgsl_page_pool *pool, int num_pages)
+{
+	int j;
+	unsigned int pcount = 0;
+
+	if (pool == NULL || num_pages <= 0)
+		return pcount;
+
+	for (j = 0; j < num_pages >> pool->pool_order; j++) {
+		struct page *page = _kgsl_pool_get_page(pool);
+
+		if (page != NULL) {
+			__free_pages(page, pool->pool_order);
+			pcount += (1 << pool->pool_order);
+		} else {
+			/* Break as this pool is empty */
+			break;
+		}
+	}
+
+	return pcount;
+}
+
+/*
+ * This function reduces the total pool size
+ * to number of pages specified by target_pages.
+ *
+ * If target_pages are greater than current pool size
+ * nothing needs to be done otherwise remove
+ * (current_pool_size - target_pages) pages from pool
+ * starting from higher order pool.
+ */
+static unsigned long
+kgsl_pool_reduce(unsigned int target_pages, bool exit)
+{
+	int total_pages = 0;
+	int i;
+	int nr_removed;
+	struct kgsl_page_pool *pool;
+	unsigned long pcount = 0;
+
+	total_pages = kgsl_pool_size_total();
+
+	for (i = (kgsl_num_pools - 1); i >= 0; i--) {
+		pool = &kgsl_pools[i];
+
+		/*
+		 * Only reduce the pool sizes for pools which are allowed to
+		 * allocate memory unless we are at close, in which case the
+		 * reserved memory for all pools needs to be freed
+		 */
+		if (!pool->allocation_allowed && !exit)
+			continue;
+
+		total_pages -= pcount;
+
+		nr_removed = total_pages - target_pages;
+		if (nr_removed <= 0)
+			return pcount;
+
+		/* Round up to integral number of pages in this pool */
+		nr_removed = ALIGN(nr_removed, 1 << pool->pool_order);
+
+		/* Remove nr_removed pages from this pool*/
+		pcount += _kgsl_pool_shrink(pool, nr_removed);
+	}
+
+	return pcount;
+}
+
+/**
+ * kgsl_pool_free_sgt() - Free scatter-gather list
+ * @sgt: pointer of the sg list
+ *
+ * Free the sg list by collapsing any physical adjacent pages.
+ * Pages are added back to the pool, if pool has sufficient space
+ * otherwise they are given back to system.
+ */
+
+void kgsl_pool_free_sgt(struct sg_table *sgt)
+{
+	int i;
+	struct scatterlist *sg;
+
+	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
+		/*
+		 * sg_alloc_table_from_pages() will collapse any physically
+		 * adjacent pages into a single scatterlist entry. We cannot
+		 * just call __free_pages() on the entire set since we cannot
+		 * ensure that the size is a whole order. Instead, free each
+		 * page or compound page group individually.
+		 */
+		struct page *p = sg_page(sg), *next;
+		unsigned int count;
+		unsigned int j = 0;
+
+		while (j < (sg->length/PAGE_SIZE)) {
+			count = 1 << compound_order(p);
+			next = nth_page(p, count);
+			kgsl_pool_free_page(p);
+
+			p = next;
+			j += count;
+		}
+	}
+}
+
+/**
+ * kgsl_pool_free_pages() - Free pages in the pages array
+ * @pages: pointer of the pages array
+ *
+ * Free the pages by collapsing any physical adjacent pages.
+ * Pages are added back to the pool, if pool has sufficient space
+ * otherwise they are given back to system.
+ */
+void kgsl_pool_free_pages(struct page **pages, unsigned int pcount)
+{
+	int i;
+
+	if (pages == NULL || pcount == 0)
+		return;
+
+	for (i = 0; i < pcount;) {
+		/*
+		 * Free each page or compound page group individually.
+		 */
+		struct page *p = pages[i];
+
+		i += 1 << compound_order(p);
+		kgsl_pool_free_page(p);
+	}
+}
+static int kgsl_pool_idx_lookup(unsigned int order)
+{
+	int i;
+
+	for (i = 0; i < kgsl_num_pools; i++)
+		if (order == kgsl_pools[i].pool_order)
+			return i;
+
+	return -ENOMEM;
+}
+
+/**
+ * kgsl_pool_alloc_page() - Allocate a page of requested size
+ * @page_size: Size of the page to be allocated
+ * @pages: pointer to hold list of pages, should be big enough to hold
+ * requested page
+ * @len: Length of array pages.
+ *
+ * Return total page count on success and negative value on failure
+ */
+int kgsl_pool_alloc_page(int *page_size, struct page **pages,
+			unsigned int pages_len, unsigned int *align)
+{
+	int j;
+	int pcount = 0;
+	struct kgsl_page_pool *pool;
+	struct page *page = NULL;
+	struct page *p = NULL;
+	int order = get_order(*page_size);
+	int pool_idx;
+
+	if ((pages == NULL) || pages_len < (*page_size >> PAGE_SHIFT))
+		return -EINVAL;
+
+	pool = _kgsl_get_pool_from_order(order);
+	if (pool == NULL)
+		return -EINVAL;
+
+	pool_idx = kgsl_pool_idx_lookup(order);
+	page = _kgsl_pool_get_page(pool);
+
+	/* Allocate a new page if not allocated from pool */
+	if (page == NULL) {
+		gfp_t gfp_mask = kgsl_gfp_mask(order);
+
+		/* Only allocate non-reserved memory for certain pools */
+		if (!pool->allocation_allowed && pool_idx > 0) {
+			*page_size = PAGE_SIZE <<
+					kgsl_pools[pool_idx-1].pool_order;
+			*align = ilog2(*page_size);
+			return -EAGAIN;
+		}
+
+		page = alloc_pages(gfp_mask, order);
+
+		if (!page) {
+			if (pool_idx > 0) {
+				/* Retry with lower order pages */
+				*page_size = PAGE_SIZE <<
+					kgsl_pools[pool_idx-1].pool_order;
+				*align = ilog2(*page_size);
+				return -EAGAIN;
+			} else
+				return -ENOMEM;
+		}
+
+		_kgsl_pool_zero_page(page, order);
+	}
+
+	for (j = 0; j < (*page_size >> PAGE_SHIFT); j++) {
+		p = nth_page(page, j);
+		pages[pcount] = p;
+		pcount++;
+	}
+
+	return pcount;
+}
+
+void kgsl_pool_free_page(struct page *page)
+{
+	struct kgsl_page_pool *pool;
+	int page_order;
+
+	if (page == NULL)
+		return;
+
+	page_order = compound_order(page);
+
+	if (!kgsl_pool_max_pages ||
+			(kgsl_pool_size_total() < kgsl_pool_max_pages)) {
+		pool = _kgsl_get_pool_from_order(page_order);
+		if (pool != NULL) {
+			_kgsl_pool_add_page(pool, page);
+			return;
+		}
+	}
+
+	/* Give back to system as not added to pool */
+	__free_pages(page, page_order);
+}
+
+static void kgsl_pool_reserve_pages(void)
+{
+	int i, j;
+
+	for (i = 0; i < kgsl_num_pools; i++) {
+		struct page *page;
+
+		for (j = 0; j < kgsl_pools[i].reserved_pages; j++) {
+			int order = kgsl_pools[i].pool_order;
+			gfp_t gfp_mask = kgsl_gfp_mask(order);
+
+			page = alloc_pages(gfp_mask, order);
+			if (page != NULL)
+				_kgsl_pool_add_page(&kgsl_pools[i], page);
+		}
+	}
+}
+
+/* Functions for the shrinker */
+
+static unsigned long
+kgsl_pool_shrink_scan_objects(struct shrinker *shrinker,
+					struct shrink_control *sc)
+{
+	/* nr represents number of pages to be removed*/
+	int nr = sc->nr_to_scan;
+	int total_pages = kgsl_pool_size_total();
+
+	/* Target pages represents new  pool size */
+	int target_pages = (nr > total_pages) ? 0 : (total_pages - nr);
+
+	/* Reduce pool size to target_pages */
+	return kgsl_pool_reduce(target_pages, false);
+}
+
+static unsigned long
+kgsl_pool_shrink_count_objects(struct shrinker *shrinker,
+					struct shrink_control *sc)
+{
+	/* Return total pool size as everything in pool can be freed */
+	return kgsl_pool_size_total();
+}
+
+/* Shrinker callback data*/
+static struct shrinker kgsl_pool_shrinker = {
+	.count_objects = kgsl_pool_shrink_count_objects,
+	.scan_objects = kgsl_pool_shrink_scan_objects,
+	.seeks = DEFAULT_SEEKS,
+	.batch = 0,
+};
+
+static void kgsl_pool_config(unsigned int order, unsigned int reserved_pages,
+		bool allocation_allowed)
+{
+#ifdef CONFIG_ALLOC_BUFFERS_IN_4K_CHUNKS
+	if (order > 0) {
+		pr_info("%s: Pool order:%d not supprted.!!\n", __func__, order);
+		return;
+	}
+#endif
+	if ((order > KGSL_MAX_POOL_ORDER) ||
+			(reserved_pages > KGSL_MAX_RESERVED_PAGES))
+		return;
+
+	kgsl_pools[kgsl_num_pools].pool_order = order;
+	kgsl_pools[kgsl_num_pools].reserved_pages = reserved_pages;
+	kgsl_pools[kgsl_num_pools].allocation_allowed = allocation_allowed;
+	spin_lock_init(&kgsl_pools[kgsl_num_pools].list_lock);
+	INIT_LIST_HEAD(&kgsl_pools[kgsl_num_pools].page_list);
+	kgsl_num_pools++;
+}
+
+static void kgsl_of_parse_mempools(struct device_node *node)
+{
+	struct device_node *child;
+	unsigned int page_size, reserved_pages = 0;
+	bool allocation_allowed;
+
+	for_each_child_of_node(node, child) {
+		unsigned int index;
+
+		if (of_property_read_u32(child, "reg", &index))
+			return;
+
+		if (index >= KGSL_MAX_POOLS)
+			continue;
+
+		if (of_property_read_u32(child, "qcom,mempool-page-size",
+					&page_size))
+			return;
+
+		of_property_read_u32(child, "qcom,mempool-reserved",
+				&reserved_pages);
+
+		allocation_allowed = of_property_read_bool(child,
+				"qcom,mempool-allocate");
+
+		kgsl_pool_config(ilog2(page_size >> PAGE_SHIFT), reserved_pages,
+				allocation_allowed);
+	}
+}
+
+static void kgsl_of_get_mempools(struct device_node *parent)
+{
+	struct device_node *node;
+
+	node = of_find_compatible_node(parent, NULL, "qcom,gpu-mempools");
+	if (node != NULL) {
+		/* Get Max pages limit for mempool */
+		of_property_read_u32(node, "qcom,mempool-max-pages",
+				&kgsl_pool_max_pages);
+		kgsl_of_parse_mempools(node);
+	}
+}
+
+void kgsl_init_page_pools(struct platform_device *pdev)
+{
+
+	/* Get GPU mempools data and configure pools */
+	kgsl_of_get_mempools(pdev->dev.of_node);
+
+	/* Reserve the appropriate number of pages for each pool */
+	kgsl_pool_reserve_pages();
+
+	/* Initialize shrinker */
+	register_shrinker(&kgsl_pool_shrinker);
+}
+
+void kgsl_exit_page_pools(void)
+{
+	/* Release all pages in pools, if any.*/
+	kgsl_pool_reduce(0, true);
+
+	/* Unregister shrinker */
+	unregister_shrinker(&kgsl_pool_shrinker);
+}
+
diff --git a/drivers/gpu/msm/kgsl_pool.h b/drivers/gpu/msm/kgsl_pool.h
new file mode 100644
index 0000000..d55e1ad
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pool.h
@@ -0,0 +1,44 @@
+/* Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_POOL_H
+#define __KGSL_POOL_H
+
+#include <linux/mm_types.h>
+#include "kgsl_sharedmem.h"
+
+static inline unsigned int
+kgsl_gfp_mask(unsigned int page_order)
+{
+	unsigned int gfp_mask = __GFP_HIGHMEM;
+
+	if (page_order > 0) {
+		gfp_mask |= __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN;
+		gfp_mask &= ~__GFP_RECLAIM;
+	} else
+		gfp_mask |= GFP_KERNEL;
+
+	if (kgsl_sharedmem_get_noretry() == true)
+		gfp_mask |= __GFP_NORETRY | __GFP_NOWARN;
+
+	return gfp_mask;
+}
+
+void kgsl_pool_free_sgt(struct sg_table *sgt);
+void kgsl_pool_free_pages(struct page **pages, unsigned int page_count);
+void kgsl_init_page_pools(struct platform_device *pdev);
+void kgsl_exit_page_pools(void);
+int kgsl_pool_alloc_page(int *page_size, struct page **pages,
+			unsigned int pages_len, unsigned int *align);
+void kgsl_pool_free_page(struct page *p);
+#endif /* __KGSL_POOL_H */
+
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.c b/drivers/gpu/msm/kgsl_pwrctrl.c
new file mode 100644
index 0000000..c43c210
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pwrctrl.c
@@ -0,0 +1,2992 @@
+/* Copyright (c) 2010-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/interrupt.h>
+#include <asm/page.h>
+#include <linux/pm_runtime.h>
+#include <linux/msm-bus.h>
+#include <linux/msm-bus-board.h>
+#include <linux/ktime.h>
+#include <linux/delay.h>
+#include <linux/msm_adreno_devfreq.h>
+#include <linux/of_device.h>
+#include <linux/thermal.h>
+
+#include "kgsl.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+
+#define KGSL_PWRFLAGS_POWER_ON 0
+#define KGSL_PWRFLAGS_CLK_ON   1
+#define KGSL_PWRFLAGS_AXI_ON   2
+#define KGSL_PWRFLAGS_IRQ_ON   3
+#define KGSL_PWRFLAGS_NAP_OFF  5
+
+#define UPDATE_BUSY_VAL		1000000
+
+/* Number of jiffies for a full thermal cycle */
+#define TH_HZ			(HZ/5)
+
+#define KGSL_MAX_BUSLEVELS	20
+
+#define DEFAULT_BUS_P 25
+
+/*
+ * The effective duration of qos request in usecs. After
+ * timeout, qos request is cancelled automatically.
+ * Kept 80ms default, inline with default GPU idle time.
+ */
+#define KGSL_L2PC_CPU_TIMEOUT	(80 * 1000)
+
+/* Order deeply matters here because reasons. New entries go on the end */
+static const char * const clocks[] = {
+	"src_clk",
+	"core_clk",
+	"iface_clk",
+	"mem_clk",
+	"mem_iface_clk",
+	"alt_mem_iface_clk",
+	"rbbmtimer_clk",
+	"gtcu_clk",
+	"gtbu_clk",
+	"gtcu_iface_clk",
+	"alwayson_clk",
+	"isense_clk",
+	"rbcpr_clk",
+	"iref_clk"
+};
+
+static unsigned int ib_votes[KGSL_MAX_BUSLEVELS];
+static int last_vote_buslevel;
+static int max_vote_buslevel;
+
+static void kgsl_pwrctrl_clk(struct kgsl_device *device, int state,
+					int requested_state);
+static void kgsl_pwrctrl_axi(struct kgsl_device *device, int state);
+static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, int state);
+static void kgsl_pwrctrl_set_state(struct kgsl_device *device,
+				unsigned int state);
+static void kgsl_pwrctrl_request_state(struct kgsl_device *device,
+				unsigned int state);
+static int _isense_clk_set_rate(struct kgsl_pwrctrl *pwr, int level);
+
+/**
+ * _record_pwrevent() - Record the history of the new event
+ * @device: Pointer to the kgsl_device struct
+ * @t: Timestamp
+ * @event: Event type
+ *
+ * Finish recording the duration of the previous event.  Then update the
+ * index, record the start of the new event, and the relevant data.
+ */
+static void _record_pwrevent(struct kgsl_device *device,
+			ktime_t t, int event) {
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+	struct kgsl_pwr_history *history = &psc->history[event];
+	int i = history->index;
+
+	if (history->events == NULL)
+		return;
+	history->events[i].duration = ktime_us_delta(t,
+					history->events[i].start);
+	i = (i + 1) % history->size;
+	history->index = i;
+	history->events[i].start = t;
+	switch (event) {
+	case KGSL_PWREVENT_STATE:
+		history->events[i].data = device->state;
+		break;
+	case KGSL_PWREVENT_GPU_FREQ:
+		history->events[i].data = device->pwrctrl.active_pwrlevel;
+		break;
+	case KGSL_PWREVENT_BUS_FREQ:
+		history->events[i].data = last_vote_buslevel;
+		break;
+	default:
+		break;
+	}
+}
+
+#ifdef CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON
+#include <soc/qcom/devfreq_devbw.h>
+
+/**
+ * kgsl_get_bw() - Return latest msm bus IB vote
+ */
+static unsigned int kgsl_get_bw(void)
+{
+	return ib_votes[last_vote_buslevel];
+}
+#endif
+
+/**
+ * _ab_buslevel_update() - Return latest msm bus AB vote
+ * @pwr: Pointer to the kgsl_pwrctrl struct
+ * @ab: Pointer to be updated with the calculated AB vote
+ */
+static void _ab_buslevel_update(struct kgsl_pwrctrl *pwr,
+				unsigned long *ab)
+{
+	unsigned int ib = ib_votes[last_vote_buslevel];
+	unsigned int max_bw = ib_votes[max_vote_buslevel];
+
+	if (!ab)
+		return;
+	if (ib == 0)
+		*ab = 0;
+	else if ((!pwr->bus_percent_ab) && (!pwr->bus_ab_mbytes))
+		*ab = DEFAULT_BUS_P * ib / 100;
+	else if (pwr->bus_width)
+		*ab = pwr->bus_ab_mbytes;
+	else
+		*ab = (pwr->bus_percent_ab * max_bw) / 100;
+
+	if (*ab > ib)
+		*ab = ib;
+}
+
+/**
+ * _adjust_pwrlevel() - Given a requested power level do bounds checking on the
+ * constraints and return the nearest possible level
+ * @device: Pointer to the kgsl_device struct
+ * @level: Requested level
+ * @pwrc: Pointer to the power constraint to be applied
+ *
+ * Apply thermal and max/min limits first.  Then force the level with a
+ * constraint if one exists.
+ */
+static unsigned int _adjust_pwrlevel(struct kgsl_pwrctrl *pwr, int level,
+					struct kgsl_pwr_constraint *pwrc,
+					int popp)
+{
+	unsigned int max_pwrlevel = max_t(unsigned int, pwr->thermal_pwrlevel,
+		pwr->max_pwrlevel);
+	unsigned int min_pwrlevel = max_t(unsigned int, pwr->thermal_pwrlevel,
+		pwr->min_pwrlevel);
+
+	switch (pwrc->type) {
+	case KGSL_CONSTRAINT_PWRLEVEL: {
+		switch (pwrc->sub_type) {
+		case KGSL_CONSTRAINT_PWR_MAX:
+			return max_pwrlevel;
+		case KGSL_CONSTRAINT_PWR_MIN:
+			return min_pwrlevel;
+		default:
+			break;
+		}
+	}
+	break;
+	}
+
+	if (popp && (max_pwrlevel < pwr->active_pwrlevel))
+		max_pwrlevel = pwr->active_pwrlevel;
+
+	if (level < max_pwrlevel)
+		return max_pwrlevel;
+	if (level > min_pwrlevel)
+		return min_pwrlevel;
+
+	return level;
+}
+
+#ifdef CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON
+static void kgsl_pwrctrl_vbif_update(unsigned long ab)
+{
+	/* ask a governor to vote on behalf of us */
+	devfreq_vbif_update_bw(ib_votes[last_vote_buslevel], ab);
+}
+#else
+static void kgsl_pwrctrl_vbif_update(unsigned long ab)
+{
+}
+#endif
+
+/**
+ * kgsl_pwrctrl_buslevel_update() - Recalculate the bus vote and send it
+ * @device: Pointer to the kgsl_device struct
+ * @on: true for setting and active bus vote, false to turn off the vote
+ */
+void kgsl_pwrctrl_buslevel_update(struct kgsl_device *device,
+			bool on)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int cur = pwr->pwrlevels[pwr->active_pwrlevel].bus_freq;
+	int buslevel = 0;
+	unsigned long ab;
+
+	/* the bus should be ON to update the active frequency */
+	if (on && !(test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags)))
+		return;
+	/*
+	 * If the bus should remain on calculate our request and submit it,
+	 * otherwise request bus level 0, off.
+	 */
+	if (on) {
+		buslevel = min_t(int, pwr->pwrlevels[0].bus_max,
+				cur + pwr->bus_mod);
+		buslevel = max_t(int, buslevel, 1);
+	} else {
+		/* If the bus is being turned off, reset to default level */
+		pwr->bus_mod = 0;
+		pwr->bus_percent_ab = 0;
+		pwr->bus_ab_mbytes = 0;
+	}
+	trace_kgsl_buslevel(device, pwr->active_pwrlevel, buslevel);
+	last_vote_buslevel = buslevel;
+
+	/* buslevel is the IB vote, update the AB */
+	_ab_buslevel_update(pwr, &ab);
+
+	/**
+	 * vote for ocmem if target supports ocmem scaling,
+	 * shut down based on "on" parameter
+	 */
+	if (pwr->ocmem_pcl)
+		msm_bus_scale_client_update_request(pwr->ocmem_pcl,
+			on ? pwr->active_pwrlevel : pwr->num_pwrlevels - 1);
+
+	/* vote for bus if gpubw-dev support is not enabled */
+	if (pwr->pcl)
+		msm_bus_scale_client_update_request(pwr->pcl, buslevel);
+
+	kgsl_pwrctrl_vbif_update(ab);
+}
+EXPORT_SYMBOL(kgsl_pwrctrl_buslevel_update);
+
+/**
+ * kgsl_pwrctrl_pwrlevel_change_settings() - Program h/w during powerlevel
+ * transitions
+ * @device: Pointer to the kgsl_device struct
+ * @post: flag to check if the call is before/after the clk_rate change
+ * @wake_up: flag to check if device is active or waking up
+ */
+static void kgsl_pwrctrl_pwrlevel_change_settings(struct kgsl_device *device,
+			bool post)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	unsigned int old = pwr->previous_pwrlevel;
+	unsigned int new = pwr->active_pwrlevel;
+
+	if (device->state != KGSL_STATE_ACTIVE)
+		return;
+	if (old == new)
+		return;
+	if (!device->ftbl->pwrlevel_change_settings)
+		return;
+
+	device->ftbl->pwrlevel_change_settings(device, old, new, post);
+}
+
+/**
+ * kgsl_pwrctrl_set_thermal_cycle() - set the thermal cycle if required
+ * @pwr: Pointer to the kgsl_pwrctrl struct
+ * @new_level: the level to transition to
+ */
+static void kgsl_pwrctrl_set_thermal_cycle(struct kgsl_pwrctrl *pwr,
+						unsigned int new_level)
+{
+	if ((new_level != pwr->thermal_pwrlevel) || !pwr->sysfs_pwr_limit)
+		return;
+	if (pwr->thermal_pwrlevel == pwr->sysfs_pwr_limit->level) {
+		/* Thermal cycle for sysfs pwr limit, start cycling*/
+		if (pwr->thermal_cycle == CYCLE_ENABLE) {
+			pwr->thermal_cycle = CYCLE_ACTIVE;
+			mod_timer(&pwr->thermal_timer, jiffies +
+					(TH_HZ - pwr->thermal_timeout));
+			pwr->thermal_highlow = 1;
+		}
+	} else {
+		/* Non sysfs pwr limit, stop thermal cycle if active*/
+		if (pwr->thermal_cycle == CYCLE_ACTIVE) {
+			pwr->thermal_cycle = CYCLE_ENABLE;
+			del_timer_sync(&pwr->thermal_timer);
+		}
+	}
+}
+
+/**
+ * kgsl_pwrctrl_pwrlevel_change() - Validate and change power levels
+ * @device: Pointer to the kgsl_device struct
+ * @new_level: Requested powerlevel, an index into the pwrlevel array
+ *
+ * Check that any power level constraints are still valid.  Update the
+ * requested level according to any thermal, max/min, or power constraints.
+ * If a new GPU level is going to be set, update the bus to that level's
+ * default value.  Do not change the bus if a constraint keeps the new
+ * level at the current level.  Set the new GPU frequency.
+ */
+void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device,
+				unsigned int new_level)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwrlevel *pwrlevel;
+	unsigned int old_level = pwr->active_pwrlevel;
+
+	/* If a pwr constraint is expired, remove it */
+	if ((pwr->constraint.type != KGSL_CONSTRAINT_NONE) &&
+		(time_after(jiffies, pwr->constraint.expires))) {
+		/* Trace the constraint being un-set by the driver */
+		trace_kgsl_constraint(device, pwr->constraint.type,
+						old_level, 0);
+		/*Invalidate the constraint set */
+		pwr->constraint.expires = 0;
+		pwr->constraint.type = KGSL_CONSTRAINT_NONE;
+	}
+
+	/*
+	 * Adjust the power level if required by thermal, max/min,
+	 * constraints, etc
+	 */
+	new_level = _adjust_pwrlevel(pwr, new_level, &pwr->constraint,
+					device->pwrscale.popp_level);
+
+	/*
+	 * If thermal cycling is required and the new level hits the
+	 * thermal limit, kick off the cycling.
+	 */
+	kgsl_pwrctrl_set_thermal_cycle(pwr, new_level);
+
+	if (new_level == old_level)
+		return;
+
+	kgsl_pwrscale_update_stats(device);
+
+	/*
+	 * Set the active and previous powerlevel first in case the clocks are
+	 * off - if we don't do this then the pwrlevel change won't take effect
+	 * when the clocks come back
+	 */
+	pwr->active_pwrlevel = new_level;
+	pwr->previous_pwrlevel = old_level;
+
+	/*
+	 * If the bus is running faster than its default level and the GPU
+	 * frequency is moving down keep the DDR at a relatively high level.
+	 */
+	if (pwr->bus_mod < 0 || new_level < old_level) {
+		pwr->bus_mod = 0;
+		pwr->bus_percent_ab = 0;
+	}
+	/*
+	 * Update the bus before the GPU clock to prevent underrun during
+	 * frequency increases.
+	 */
+	kgsl_pwrctrl_buslevel_update(device, true);
+
+	pwrlevel = &pwr->pwrlevels[pwr->active_pwrlevel];
+	/* Change register settings if any  BEFORE pwrlevel change*/
+	kgsl_pwrctrl_pwrlevel_change_settings(device, 0);
+	clk_set_rate(pwr->grp_clks[0], pwrlevel->gpu_freq);
+	_isense_clk_set_rate(pwr, pwr->active_pwrlevel);
+
+	trace_kgsl_pwrlevel(device,
+			pwr->active_pwrlevel, pwrlevel->gpu_freq,
+			pwr->previous_pwrlevel,
+			pwr->pwrlevels[old_level].gpu_freq);
+
+	/*
+	 * Some targets do not support the bandwidth requirement of
+	 * GPU at TURBO, for such targets we need to set GPU-BIMC
+	 * interface clocks to TURBO directly whenever GPU runs at
+	 * TURBO. The TURBO frequency of gfx-bimc need to be defined
+	 * in target device tree.
+	 */
+	if (pwr->gpu_bimc_int_clk) {
+		if (pwr->active_pwrlevel == 0 &&
+				!pwr->gpu_bimc_interface_enabled) {
+			clk_set_rate(pwr->gpu_bimc_int_clk,
+					pwr->gpu_bimc_int_clk_freq);
+			clk_prepare_enable(pwr->gpu_bimc_int_clk);
+			pwr->gpu_bimc_interface_enabled = 1;
+		} else if (pwr->previous_pwrlevel == 0
+				&& pwr->gpu_bimc_interface_enabled) {
+			clk_disable_unprepare(pwr->gpu_bimc_int_clk);
+			pwr->gpu_bimc_interface_enabled = 0;
+		}
+	}
+
+	/* Change register settings if any AFTER pwrlevel change*/
+	kgsl_pwrctrl_pwrlevel_change_settings(device, 1);
+
+	/* Timestamp the frequency change */
+	device->pwrscale.freq_change_time = ktime_to_ms(ktime_get());
+}
+EXPORT_SYMBOL(kgsl_pwrctrl_pwrlevel_change);
+
+/**
+ * kgsl_pwrctrl_set_constraint() - Validate and change enforced constraint
+ * @device: Pointer to the kgsl_device struct
+ * @pwrc: Pointer to requested constraint
+ * @id: Context id which owns the constraint
+ *
+ * Accept the new constraint if no previous constraint existed or if the
+ * new constraint is faster than the previous one.  If the new and previous
+ * constraints are equal, update the timestamp and ownership to make sure
+ * the constraint expires at the correct time.
+ */
+void kgsl_pwrctrl_set_constraint(struct kgsl_device *device,
+			struct kgsl_pwr_constraint *pwrc, uint32_t id)
+{
+	unsigned int constraint;
+	struct kgsl_pwr_constraint *pwrc_old;
+
+	if (device == NULL || pwrc == NULL)
+		return;
+	constraint = _adjust_pwrlevel(&device->pwrctrl,
+				device->pwrctrl.active_pwrlevel, pwrc, 0);
+	pwrc_old = &device->pwrctrl.constraint;
+
+	/*
+	 * If a constraint is already set, set a new constraint only
+	 * if it is faster.  If the requested constraint is the same
+	 * as the current one, update ownership and timestamp.
+	 */
+	if ((pwrc_old->type == KGSL_CONSTRAINT_NONE) ||
+		(constraint < pwrc_old->hint.pwrlevel.level)) {
+		pwrc_old->type = pwrc->type;
+		pwrc_old->sub_type = pwrc->sub_type;
+		pwrc_old->hint.pwrlevel.level = constraint;
+		pwrc_old->owner_id = id;
+		pwrc_old->expires = jiffies + device->pwrctrl.interval_timeout;
+		kgsl_pwrctrl_pwrlevel_change(device, constraint);
+		/* Trace the constraint being set by the driver */
+		trace_kgsl_constraint(device, pwrc_old->type, constraint, 1);
+	} else if ((pwrc_old->type == pwrc->type) &&
+		(pwrc_old->hint.pwrlevel.level == constraint)) {
+		pwrc_old->owner_id = id;
+		pwrc_old->expires = jiffies + device->pwrctrl.interval_timeout;
+	}
+}
+EXPORT_SYMBOL(kgsl_pwrctrl_set_constraint);
+
+/**
+ * kgsl_pwrctrl_update_l2pc() - Update existing qos request
+ * @device: Pointer to the kgsl_device struct
+ *
+ * Updates an existing qos request to avoid L2PC on the
+ * CPUs (which are selected through dtsi) on which GPU
+ * thread is running. This would help for performance.
+ */
+void kgsl_pwrctrl_update_l2pc(struct kgsl_device *device)
+{
+	int cpu;
+
+	if (device->pwrctrl.l2pc_cpus_mask == 0)
+		return;
+
+	cpu = get_cpu();
+	put_cpu();
+
+	if ((1 << cpu) & device->pwrctrl.l2pc_cpus_mask) {
+		pm_qos_update_request_timeout(
+				&device->pwrctrl.l2pc_cpus_qos,
+				device->pwrctrl.pm_qos_cpu_mask_latency,
+				KGSL_L2PC_CPU_TIMEOUT);
+	}
+}
+EXPORT_SYMBOL(kgsl_pwrctrl_update_l2pc);
+
+static ssize_t kgsl_pwrctrl_thermal_pwrlevel_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	int ret;
+	unsigned int level = 0;
+
+	if (device == NULL)
+		return 0;
+
+	pwr = &device->pwrctrl;
+
+	ret = kgsl_sysfs_store(buf, &level);
+
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+
+	if (level > pwr->num_pwrlevels - 2)
+		level = pwr->num_pwrlevels - 2;
+
+	pwr->thermal_pwrlevel = level;
+
+	/* Update the current level using the new limit */
+	kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel);
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_thermal_pwrlevel_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	return snprintf(buf, PAGE_SIZE, "%d\n", pwr->thermal_pwrlevel);
+}
+
+static ssize_t kgsl_pwrctrl_max_pwrlevel_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	int ret;
+	unsigned int level = 0;
+
+	if (device == NULL)
+		return 0;
+
+	pwr = &device->pwrctrl;
+
+	ret = kgsl_sysfs_store(buf, &level);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+
+	/* You can't set a maximum power level lower than the minimum */
+	if (level > pwr->min_pwrlevel)
+		level = pwr->min_pwrlevel;
+
+	pwr->max_pwrlevel = level;
+
+	/* Update the current level using the new limit */
+	kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel);
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_max_pwrlevel_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	return snprintf(buf, PAGE_SIZE, "%u\n", pwr->max_pwrlevel);
+}
+
+static void kgsl_pwrctrl_min_pwrlevel_set(struct kgsl_device *device,
+					int level)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	mutex_lock(&device->mutex);
+	if (level > pwr->num_pwrlevels - 2)
+		level = pwr->num_pwrlevels - 2;
+
+	/* You can't set a minimum power level lower than the maximum */
+	if (level < pwr->max_pwrlevel)
+		level = pwr->max_pwrlevel;
+
+	pwr->min_pwrlevel = level;
+
+	/* Update the current level using the new limit */
+	kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel);
+
+	mutex_unlock(&device->mutex);
+}
+
+static ssize_t kgsl_pwrctrl_min_pwrlevel_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int ret;
+	unsigned int level = 0;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &level);
+	if (ret)
+		return ret;
+
+	kgsl_pwrctrl_min_pwrlevel_set(device, level);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_min_pwrlevel_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	return snprintf(buf, PAGE_SIZE, "%u\n", pwr->min_pwrlevel);
+}
+
+static ssize_t kgsl_pwrctrl_num_pwrlevels_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	return snprintf(buf, PAGE_SIZE, "%d\n", pwr->num_pwrlevels - 1);
+}
+
+/* Given a GPU clock value, return the lowest matching powerlevel */
+
+static int _get_nearest_pwrlevel(struct kgsl_pwrctrl *pwr, unsigned int clock)
+{
+	int i;
+
+	for (i = pwr->num_pwrlevels - 1; i >= 0; i--) {
+		if (abs(pwr->pwrlevels[i].gpu_freq - clock) < 5000000)
+			return i;
+	}
+
+	return -ERANGE;
+}
+
+static void kgsl_pwrctrl_max_clock_set(struct kgsl_device *device, int val)
+{
+	struct kgsl_pwrctrl *pwr;
+	int level;
+
+	pwr = &device->pwrctrl;
+
+	mutex_lock(&device->mutex);
+	level = _get_nearest_pwrlevel(pwr, val);
+	/* If the requested power level is not supported by hw, try cycling */
+	if (level < 0) {
+		unsigned int hfreq, diff, udiff, i;
+
+		if ((val < pwr->pwrlevels[pwr->num_pwrlevels - 1].gpu_freq) ||
+			(val > pwr->pwrlevels[0].gpu_freq))
+			goto err;
+
+		/* Find the neighboring frequencies */
+		for (i = 0; i < pwr->num_pwrlevels - 1; i++) {
+			if ((pwr->pwrlevels[i].gpu_freq > val) &&
+				(pwr->pwrlevels[i + 1].gpu_freq < val)) {
+				level = i;
+				break;
+			}
+		}
+		if (i == pwr->num_pwrlevels - 1)
+			goto err;
+		hfreq = pwr->pwrlevels[i].gpu_freq;
+		diff =  hfreq - pwr->pwrlevels[i + 1].gpu_freq;
+		udiff = hfreq - val;
+		pwr->thermal_timeout = (udiff * TH_HZ) / diff;
+		pwr->thermal_cycle = CYCLE_ENABLE;
+	} else {
+		pwr->thermal_cycle = CYCLE_DISABLE;
+		del_timer_sync(&pwr->thermal_timer);
+	}
+	mutex_unlock(&device->mutex);
+
+	if (pwr->sysfs_pwr_limit)
+		kgsl_pwr_limits_set_freq(pwr->sysfs_pwr_limit,
+					pwr->pwrlevels[level].gpu_freq);
+	return;
+
+err:
+	mutex_unlock(&device->mutex);
+}
+
+static ssize_t kgsl_pwrctrl_max_gpuclk_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	unsigned int val = 0;
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	kgsl_pwrctrl_max_clock_set(device, val);
+
+	return count;
+}
+
+static unsigned int kgsl_pwrctrl_max_clock_get(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwr;
+	unsigned int freq;
+
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	freq = pwr->pwrlevels[pwr->thermal_pwrlevel].gpu_freq;
+	/* Calculate the effective frequency if we're cycling */
+	if (pwr->thermal_cycle) {
+		unsigned int hfreq = freq;
+		unsigned int lfreq = pwr->pwrlevels[pwr->
+				thermal_pwrlevel + 1].gpu_freq;
+		freq = pwr->thermal_timeout * (lfreq / TH_HZ) +
+			(TH_HZ - pwr->thermal_timeout) * (hfreq / TH_HZ);
+	}
+
+	return freq;
+}
+
+static ssize_t kgsl_pwrctrl_max_gpuclk_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		kgsl_pwrctrl_max_clock_get(device));
+}
+
+static ssize_t kgsl_pwrctrl_gpuclk_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	unsigned int val = 0;
+	int ret, level;
+
+	if (device == NULL)
+		return 0;
+
+	pwr = &device->pwrctrl;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+	level = _get_nearest_pwrlevel(pwr, val);
+	if (level >= 0)
+		kgsl_pwrctrl_pwrlevel_change(device, (unsigned int) level);
+
+	mutex_unlock(&device->mutex);
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_gpuclk_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	return snprintf(buf, PAGE_SIZE, "%ld\n", kgsl_pwrctrl_active_freq(pwr));
+}
+
+static ssize_t __timer_store(struct device *dev, struct device_attribute *attr,
+					const char *buf, size_t count,
+					enum kgsl_pwrctrl_timer_type timer)
+{
+	unsigned int val = 0;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	/*
+	 * We don't quite accept a maximum of 0xFFFFFFFF due to internal jiffy
+	 * math, so make sure the value falls within the largest offset we can
+	 * deal with
+	 */
+
+	if (val > jiffies_to_usecs(MAX_JIFFY_OFFSET))
+		return -EINVAL;
+
+	mutex_lock(&device->mutex);
+	/* Let the timeout be requested in ms, but convert to jiffies. */
+	if (timer == KGSL_PWR_IDLE_TIMER)
+		device->pwrctrl.interval_timeout = msecs_to_jiffies(val);
+
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_idle_timer_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	return __timer_store(dev, attr, buf, count, KGSL_PWR_IDLE_TIMER);
+}
+
+static ssize_t kgsl_pwrctrl_idle_timer_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+
+	if (device == NULL)
+		return 0;
+	/* Show the idle_timeout converted to msec */
+	return snprintf(buf, PAGE_SIZE, "%u\n",
+		jiffies_to_msecs(device->pwrctrl.interval_timeout));
+}
+
+static ssize_t kgsl_pwrctrl_pmqos_active_latency_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	unsigned int val = 0;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+	device->pwrctrl.pm_qos_active_latency = val;
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_pmqos_active_latency_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		device->pwrctrl.pm_qos_active_latency);
+}
+
+static ssize_t kgsl_pwrctrl_gpubusy_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	int ret;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_clk_stats *stats;
+
+	if (device == NULL)
+		return 0;
+	stats = &device->pwrctrl.clk_stats;
+	ret = snprintf(buf, PAGE_SIZE, "%7d %7d\n",
+			stats->busy_old, stats->total_old);
+	if (!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) {
+		stats->busy_old = 0;
+		stats->total_old = 0;
+	}
+	return ret;
+}
+
+static ssize_t kgsl_pwrctrl_gpu_available_frequencies_show(
+					struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	int index, num_chars = 0;
+
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	for (index = 0; index < pwr->num_pwrlevels - 1; index++) {
+		num_chars += scnprintf(buf + num_chars,
+			PAGE_SIZE - num_chars - 1,
+			"%d ", pwr->pwrlevels[index].gpu_freq);
+		/* One space for trailing null and another for the newline */
+		if (num_chars >= PAGE_SIZE - 2)
+			break;
+	}
+	buf[num_chars++] = '\n';
+	return num_chars;
+}
+
+static ssize_t kgsl_pwrctrl_gpu_clock_stats_show(
+					struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	int index, num_chars = 0;
+
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+	mutex_lock(&device->mutex);
+	kgsl_pwrscale_update_stats(device);
+	mutex_unlock(&device->mutex);
+	for (index = 0; index < pwr->num_pwrlevels - 1; index++)
+		num_chars += snprintf(buf + num_chars, PAGE_SIZE - num_chars,
+			"%llu ", pwr->clock_times[index]);
+
+	if (num_chars < PAGE_SIZE)
+		buf[num_chars++] = '\n';
+
+	return num_chars;
+}
+
+static ssize_t kgsl_pwrctrl_reset_count_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n", device->reset_counter);
+}
+
+static void __force_on(struct kgsl_device *device, int flag, int on)
+{
+	if (on) {
+		switch (flag) {
+		case KGSL_PWRFLAGS_CLK_ON:
+			kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON,
+				KGSL_STATE_ACTIVE);
+			break;
+		case KGSL_PWRFLAGS_AXI_ON:
+			kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON);
+			break;
+		case KGSL_PWRFLAGS_POWER_ON:
+			kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_ON);
+			break;
+		}
+		set_bit(flag, &device->pwrctrl.ctrl_flags);
+	} else {
+		clear_bit(flag, &device->pwrctrl.ctrl_flags);
+	}
+}
+
+static ssize_t __force_on_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf, int flag)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		test_bit(flag, &device->pwrctrl.ctrl_flags));
+}
+
+static ssize_t __force_on_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count,
+					int flag)
+{
+	unsigned int val = 0;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+	__force_on(device, flag, val);
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_force_clk_on_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_CLK_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_clk_on_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_CLK_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_bus_on_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_AXI_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_bus_on_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_AXI_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_rail_on_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_POWER_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_rail_on_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	return __force_on_store(dev, attr, buf, count, KGSL_PWRFLAGS_POWER_ON);
+}
+
+static ssize_t kgsl_pwrctrl_force_no_nap_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return __force_on_show(dev, attr, buf, KGSL_PWRFLAGS_NAP_OFF);
+}
+
+static ssize_t kgsl_pwrctrl_force_no_nap_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	return __force_on_store(dev, attr, buf, count,
+					KGSL_PWRFLAGS_NAP_OFF);
+}
+
+static ssize_t kgsl_pwrctrl_bus_split_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		device->pwrctrl.bus_control);
+}
+
+static ssize_t kgsl_pwrctrl_bus_split_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	unsigned int val = 0;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+	device->pwrctrl.bus_control = val ? true : false;
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_default_pwrlevel_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		device->pwrctrl.default_pwrlevel);
+}
+
+static ssize_t kgsl_pwrctrl_default_pwrlevel_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	struct kgsl_pwrscale *pwrscale;
+	int ret;
+	unsigned int level = 0;
+
+	if (device == NULL)
+		return 0;
+
+	pwr = &device->pwrctrl;
+	pwrscale = &device->pwrscale;
+
+	ret = kgsl_sysfs_store(buf, &level);
+	if (ret)
+		return ret;
+
+	if (level > pwr->num_pwrlevels - 2)
+		goto done;
+
+	mutex_lock(&device->mutex);
+	pwr->default_pwrlevel = level;
+	pwrscale->gpu_profile.profile.initial_freq
+			= pwr->pwrlevels[level].gpu_freq;
+
+	mutex_unlock(&device->mutex);
+done:
+	return count;
+}
+
+
+static ssize_t kgsl_popp_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	unsigned int val = 0;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+	if (val)
+		set_bit(POPP_ON, &device->pwrscale.popp_state);
+	else
+		clear_bit(POPP_ON, &device->pwrscale.popp_state);
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_popp_show(struct device *dev,
+					   struct device_attribute *attr,
+					   char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+
+	if (device == NULL)
+		return 0;
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+		test_bit(POPP_ON, &device->pwrscale.popp_state));
+}
+
+static ssize_t kgsl_pwrctrl_gpu_model_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	char model_str[32] = {0};
+
+	if (device == NULL)
+		return 0;
+
+	device->ftbl->gpu_model(device, model_str, sizeof(model_str));
+
+	return snprintf(buf, PAGE_SIZE, "%s\n", model_str);
+}
+
+static ssize_t kgsl_pwrctrl_gpu_busy_percentage_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	int ret;
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_clk_stats *stats;
+	unsigned int busy_percent = 0;
+
+	if (device == NULL)
+		return 0;
+	stats = &device->pwrctrl.clk_stats;
+
+	if (stats->total_old != 0)
+		busy_percent = (stats->busy_old * 100) / stats->total_old;
+
+	ret = snprintf(buf, PAGE_SIZE, "%d %%\n", busy_percent);
+
+	/* Reset the stats if GPU is OFF */
+	if (!test_bit(KGSL_PWRFLAGS_AXI_ON, &device->pwrctrl.power_flags)) {
+		stats->busy_old = 0;
+		stats->total_old = 0;
+	}
+	return ret;
+}
+
+static ssize_t kgsl_pwrctrl_min_clock_mhz_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+
+	if (device == NULL)
+		return 0;
+	pwr = &device->pwrctrl;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			pwr->pwrlevels[pwr->min_pwrlevel].gpu_freq / 1000000);
+}
+
+static ssize_t kgsl_pwrctrl_min_clock_mhz_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int level, ret;
+	unsigned int freq;
+	struct kgsl_pwrctrl *pwr;
+
+	if (device == NULL)
+		return 0;
+
+	pwr = &device->pwrctrl;
+
+	ret = kgsl_sysfs_store(buf, &freq);
+	if (ret)
+		return ret;
+
+	freq *= 1000000;
+	level = _get_nearest_pwrlevel(pwr, freq);
+
+	if (level >= 0)
+		kgsl_pwrctrl_min_pwrlevel_set(device, level);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_max_clock_mhz_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	unsigned int freq;
+
+	if (device == NULL)
+		return 0;
+
+	freq = kgsl_pwrctrl_max_clock_get(device);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", freq / 1000000);
+}
+
+static ssize_t kgsl_pwrctrl_max_clock_mhz_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	unsigned int val = 0;
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+	if (ret)
+		return ret;
+
+	val *= 1000000;
+	kgsl_pwrctrl_max_clock_set(device, val);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_clock_mhz_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+
+	if (device == NULL)
+		return 0;
+
+	return snprintf(buf, PAGE_SIZE, "%ld\n",
+			kgsl_pwrctrl_active_freq(&device->pwrctrl) / 1000000);
+}
+
+static ssize_t kgsl_pwrctrl_freq_table_mhz_show(
+					struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	int index, num_chars = 0;
+
+	if (device == NULL)
+		return 0;
+
+	pwr = &device->pwrctrl;
+	for (index = 0; index < pwr->num_pwrlevels - 1; index++) {
+		num_chars += scnprintf(buf + num_chars,
+			PAGE_SIZE - num_chars - 1,
+			"%d ", pwr->pwrlevels[index].gpu_freq / 1000000);
+		/* One space for trailing null and another for the newline */
+		if (num_chars >= PAGE_SIZE - 2)
+			break;
+	}
+
+	buf[num_chars++] = '\n';
+
+	return num_chars;
+}
+
+static ssize_t kgsl_pwrctrl_temp_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrctrl *pwr;
+	struct thermal_zone_device *thermal_dev;
+	int ret, temperature = 0;
+
+	if (device == NULL)
+		goto done;
+
+	pwr = &device->pwrctrl;
+
+	if (!pwr->tzone_name)
+		goto done;
+
+	thermal_dev = thermal_zone_get_zone_by_name((char *)pwr->tzone_name);
+	if (thermal_dev == NULL)
+		goto done;
+
+	ret = thermal_zone_get_temp(thermal_dev, &temperature);
+	if (ret)
+		goto done;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			temperature);
+done:
+	return 0;
+}
+
+static ssize_t kgsl_pwrctrl_pwrscale_store(struct device *dev,
+					   struct device_attribute *attr,
+					   const char *buf, size_t count)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	int ret;
+	unsigned int enable = 0;
+
+	if (device == NULL)
+		return 0;
+
+	ret = kgsl_sysfs_store(buf, &enable);
+	if (ret)
+		return ret;
+
+	mutex_lock(&device->mutex);
+
+	if (enable)
+		kgsl_pwrscale_enable(device);
+	else
+		kgsl_pwrscale_disable(device, false);
+
+	mutex_unlock(&device->mutex);
+
+	return count;
+}
+
+static ssize_t kgsl_pwrctrl_pwrscale_show(struct device *dev,
+					  struct device_attribute *attr,
+					  char *buf)
+{
+	struct kgsl_device *device = kgsl_device_from_dev(dev);
+	struct kgsl_pwrscale *psc;
+
+	if (device == NULL)
+		return 0;
+	psc = &device->pwrscale;
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", psc->enabled);
+}
+
+static DEVICE_ATTR(temp, 0444, kgsl_pwrctrl_temp_show, NULL);
+static DEVICE_ATTR(gpuclk, 0644, kgsl_pwrctrl_gpuclk_show,
+	kgsl_pwrctrl_gpuclk_store);
+static DEVICE_ATTR(max_gpuclk, 0644, kgsl_pwrctrl_max_gpuclk_show,
+	kgsl_pwrctrl_max_gpuclk_store);
+static DEVICE_ATTR(idle_timer, 0644, kgsl_pwrctrl_idle_timer_show,
+	kgsl_pwrctrl_idle_timer_store);
+static DEVICE_ATTR(gpubusy, 0444, kgsl_pwrctrl_gpubusy_show,
+	NULL);
+static DEVICE_ATTR(gpu_available_frequencies, 0444,
+	kgsl_pwrctrl_gpu_available_frequencies_show,
+	NULL);
+static DEVICE_ATTR(gpu_clock_stats, 0444,
+	kgsl_pwrctrl_gpu_clock_stats_show,
+	NULL);
+static DEVICE_ATTR(max_pwrlevel, 0644,
+	kgsl_pwrctrl_max_pwrlevel_show,
+	kgsl_pwrctrl_max_pwrlevel_store);
+static DEVICE_ATTR(min_pwrlevel, 0644,
+	kgsl_pwrctrl_min_pwrlevel_show,
+	kgsl_pwrctrl_min_pwrlevel_store);
+static DEVICE_ATTR(thermal_pwrlevel, 0644,
+	kgsl_pwrctrl_thermal_pwrlevel_show,
+	kgsl_pwrctrl_thermal_pwrlevel_store);
+static DEVICE_ATTR(num_pwrlevels, 0444,
+	kgsl_pwrctrl_num_pwrlevels_show,
+	NULL);
+static DEVICE_ATTR(pmqos_active_latency, 0644,
+	kgsl_pwrctrl_pmqos_active_latency_show,
+	kgsl_pwrctrl_pmqos_active_latency_store);
+static DEVICE_ATTR(reset_count, 0444,
+	kgsl_pwrctrl_reset_count_show,
+	NULL);
+static DEVICE_ATTR(force_clk_on, 0644,
+	kgsl_pwrctrl_force_clk_on_show,
+	kgsl_pwrctrl_force_clk_on_store);
+static DEVICE_ATTR(force_bus_on, 0644,
+	kgsl_pwrctrl_force_bus_on_show,
+	kgsl_pwrctrl_force_bus_on_store);
+static DEVICE_ATTR(force_rail_on, 0644,
+	kgsl_pwrctrl_force_rail_on_show,
+	kgsl_pwrctrl_force_rail_on_store);
+static DEVICE_ATTR(bus_split, 0644,
+	kgsl_pwrctrl_bus_split_show,
+	kgsl_pwrctrl_bus_split_store);
+static DEVICE_ATTR(default_pwrlevel, 0644,
+	kgsl_pwrctrl_default_pwrlevel_show,
+	kgsl_pwrctrl_default_pwrlevel_store);
+static DEVICE_ATTR(popp, 0644, kgsl_popp_show, kgsl_popp_store);
+static DEVICE_ATTR(force_no_nap, 0644,
+	kgsl_pwrctrl_force_no_nap_show,
+	kgsl_pwrctrl_force_no_nap_store);
+static DEVICE_ATTR(gpu_model, 0444, kgsl_pwrctrl_gpu_model_show, NULL);
+static DEVICE_ATTR(gpu_busy_percentage, 0444,
+	kgsl_pwrctrl_gpu_busy_percentage_show, NULL);
+static DEVICE_ATTR(min_clock_mhz, 0644, kgsl_pwrctrl_min_clock_mhz_show,
+	kgsl_pwrctrl_min_clock_mhz_store);
+static DEVICE_ATTR(max_clock_mhz, 0644, kgsl_pwrctrl_max_clock_mhz_show,
+	kgsl_pwrctrl_max_clock_mhz_store);
+static DEVICE_ATTR(clock_mhz, 0444, kgsl_pwrctrl_clock_mhz_show, NULL);
+static DEVICE_ATTR(freq_table_mhz, 0444,
+	kgsl_pwrctrl_freq_table_mhz_show, NULL);
+static DEVICE_ATTR(pwrscale, 0644,
+	kgsl_pwrctrl_pwrscale_show,
+	kgsl_pwrctrl_pwrscale_store);
+
+static const struct device_attribute *pwrctrl_attr_list[] = {
+	&dev_attr_gpuclk,
+	&dev_attr_max_gpuclk,
+	&dev_attr_idle_timer,
+	&dev_attr_gpubusy,
+	&dev_attr_gpu_available_frequencies,
+	&dev_attr_gpu_clock_stats,
+	&dev_attr_max_pwrlevel,
+	&dev_attr_min_pwrlevel,
+	&dev_attr_thermal_pwrlevel,
+	&dev_attr_num_pwrlevels,
+	&dev_attr_pmqos_active_latency,
+	&dev_attr_reset_count,
+	&dev_attr_force_clk_on,
+	&dev_attr_force_bus_on,
+	&dev_attr_force_rail_on,
+	&dev_attr_force_no_nap,
+	&dev_attr_bus_split,
+	&dev_attr_default_pwrlevel,
+	&dev_attr_popp,
+	&dev_attr_gpu_model,
+	&dev_attr_gpu_busy_percentage,
+	&dev_attr_min_clock_mhz,
+	&dev_attr_max_clock_mhz,
+	&dev_attr_clock_mhz,
+	&dev_attr_freq_table_mhz,
+	&dev_attr_temp,
+	&dev_attr_pwrscale,
+	NULL
+};
+
+struct sysfs_link {
+	const char *src;
+	const char *dst;
+};
+
+static struct sysfs_link link_names[] = {
+	{ "gpu_model", "gpu_model",},
+	{ "gpu_busy_percentage", "gpu_busy",},
+	{ "min_clock_mhz", "gpu_min_clock",},
+	{ "max_clock_mhz", "gpu_max_clock",},
+	{ "clock_mhz", "gpu_clock",},
+	{ "freq_table_mhz", "gpu_freq_table",},
+	{ "temp", "gpu_tmu",},
+};
+
+int kgsl_pwrctrl_init_sysfs(struct kgsl_device *device)
+{
+	int i, ret;
+
+	ret = kgsl_create_device_sysfs_files(device->dev, pwrctrl_attr_list);
+	if (ret)
+		return ret;
+
+	device->gpu_sysfs_kobj = kobject_create_and_add("gpu", kernel_kobj);
+	if (IS_ERR_OR_NULL(device->gpu_sysfs_kobj))
+		return (device->gpu_sysfs_kobj == NULL) ?
+		-ENOMEM : PTR_ERR(device->gpu_sysfs_kobj);
+
+	for (i = 0; i < ARRAY_SIZE(link_names); i++)
+		kgsl_gpu_sysfs_add_link(device->gpu_sysfs_kobj,
+			&device->dev->kobj, link_names[i].src,
+			link_names[i].dst);
+
+	return 0;
+}
+
+void kgsl_pwrctrl_uninit_sysfs(struct kgsl_device *device)
+{
+	kgsl_remove_device_sysfs_files(device->dev, pwrctrl_attr_list);
+}
+
+/*
+ * Track the amount of time the gpu is on vs the total system time.
+ * Regularly update the percentage of busy time displayed by sysfs.
+ */
+void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy)
+{
+	struct kgsl_clk_stats *stats = &device->pwrctrl.clk_stats;
+
+	stats->total += time;
+	stats->busy += busy;
+
+	if (stats->total < UPDATE_BUSY_VAL)
+		return;
+
+	/* Update the output regularly and reset the counters. */
+	stats->total_old = stats->total;
+	stats->busy_old = stats->busy;
+	stats->total = 0;
+	stats->busy = 0;
+
+	trace_kgsl_gpubusy(device, stats->busy_old, stats->total_old);
+}
+EXPORT_SYMBOL(kgsl_pwrctrl_busy_time);
+
+static void kgsl_pwrctrl_clk(struct kgsl_device *device, int state,
+					  int requested_state)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int i = 0;
+
+	if (test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->ctrl_flags))
+		return;
+
+	if (state == KGSL_PWRFLAGS_OFF) {
+		if (test_and_clear_bit(KGSL_PWRFLAGS_CLK_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_clk(device, state,
+					kgsl_pwrctrl_active_freq(pwr));
+			/* Disable gpu-bimc-interface clocks */
+			if (pwr->gpu_bimc_int_clk &&
+					pwr->gpu_bimc_interface_enabled) {
+				clk_disable_unprepare(pwr->gpu_bimc_int_clk);
+				pwr->gpu_bimc_interface_enabled = 0;
+			}
+
+			for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+				clk_disable(pwr->grp_clks[i]);
+			/* High latency clock maintenance. */
+			if ((pwr->pwrlevels[0].gpu_freq > 0) &&
+				(requested_state != KGSL_STATE_NAP)) {
+				for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+					clk_unprepare(pwr->grp_clks[i]);
+				clk_set_rate(pwr->grp_clks[0],
+					pwr->pwrlevels[pwr->num_pwrlevels - 1].
+					gpu_freq);
+				_isense_clk_set_rate(pwr,
+					pwr->num_pwrlevels - 1);
+			}
+
+			/* Turn off the IOMMU clocks */
+			kgsl_mmu_disable_clk(&device->mmu);
+		} else if (requested_state == KGSL_STATE_SLUMBER) {
+			/* High latency clock maintenance. */
+			for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+				clk_unprepare(pwr->grp_clks[i]);
+			if ((pwr->pwrlevels[0].gpu_freq > 0)) {
+				clk_set_rate(pwr->grp_clks[0],
+					pwr->pwrlevels[pwr->num_pwrlevels - 1].
+					gpu_freq);
+				_isense_clk_set_rate(pwr,
+					pwr->num_pwrlevels - 1);
+			}
+		}
+	} else if (state == KGSL_PWRFLAGS_ON) {
+		if (!test_and_set_bit(KGSL_PWRFLAGS_CLK_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_clk(device, state,
+					kgsl_pwrctrl_active_freq(pwr));
+			/* High latency clock maintenance. */
+			if (device->state != KGSL_STATE_NAP) {
+				if (pwr->pwrlevels[0].gpu_freq > 0) {
+					clk_set_rate(pwr->grp_clks[0],
+						pwr->pwrlevels
+						[pwr->active_pwrlevel].
+						gpu_freq);
+					_isense_clk_set_rate(pwr,
+						pwr->active_pwrlevel);
+				}
+
+				for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+					clk_prepare(pwr->grp_clks[i]);
+			}
+			/*
+			 * as last step, enable grp_clk
+			 * this is to let GPU interrupt to come
+			 */
+			for (i = KGSL_MAX_CLKS - 1; i > 0; i--)
+				clk_enable(pwr->grp_clks[i]);
+			/* Enable the gpu-bimc-interface clocks */
+			if (pwr->gpu_bimc_int_clk) {
+				if (pwr->active_pwrlevel == 0 &&
+					!pwr->gpu_bimc_interface_enabled) {
+					clk_set_rate(pwr->gpu_bimc_int_clk,
+						pwr->gpu_bimc_int_clk_freq);
+					clk_prepare_enable(
+						pwr->gpu_bimc_int_clk);
+					pwr->gpu_bimc_interface_enabled = 1;
+				}
+			}
+
+			/* Turn on the IOMMU clocks */
+			kgsl_mmu_enable_clk(&device->mmu);
+		}
+
+	}
+}
+
+#ifdef CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON
+static void kgsl_pwrctrl_suspend_devbw(struct kgsl_pwrctrl *pwr)
+{
+	if (pwr->devbw)
+		devfreq_suspend_devbw(pwr->devbw);
+}
+
+static void kgsl_pwrctrl_resume_devbw(struct kgsl_pwrctrl *pwr)
+{
+	if (pwr->devbw)
+		devfreq_resume_devbw(pwr->devbw);
+}
+#else
+static void kgsl_pwrctrl_suspend_devbw(struct kgsl_pwrctrl *pwr)
+{
+}
+
+static void kgsl_pwrctrl_resume_devbw(struct kgsl_pwrctrl *pwr)
+{
+}
+#endif
+
+static void kgsl_pwrctrl_axi(struct kgsl_device *device, int state)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	if (test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->ctrl_flags))
+		return;
+
+	if (state == KGSL_PWRFLAGS_OFF) {
+		if (test_and_clear_bit(KGSL_PWRFLAGS_AXI_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_bus(device, state);
+			kgsl_pwrctrl_buslevel_update(device, false);
+
+			kgsl_pwrctrl_suspend_devbw(pwr);
+		}
+	} else if (state == KGSL_PWRFLAGS_ON) {
+		if (!test_and_set_bit(KGSL_PWRFLAGS_AXI_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_bus(device, state);
+			kgsl_pwrctrl_buslevel_update(device, true);
+
+			kgsl_pwrctrl_resume_devbw(pwr);
+		}
+	}
+}
+
+static int _regulator_enable(struct kgsl_device *device,
+		struct kgsl_regulator *regulator)
+{
+	int ret;
+
+	if (IS_ERR_OR_NULL(regulator->reg))
+		return 0;
+
+	ret = regulator_enable(regulator->reg);
+	if (ret)
+		KGSL_DRV_ERR(device, "Failed to enable regulator '%s': %d\n",
+			regulator->name, ret);
+	return ret;
+}
+
+static void _regulator_disable(struct kgsl_regulator *regulator)
+{
+	if (!IS_ERR_OR_NULL(regulator->reg))
+		regulator_disable(regulator->reg);
+}
+
+static int _enable_regulators(struct kgsl_device *device,
+		struct kgsl_pwrctrl *pwr)
+{
+	int i;
+
+	for (i = 0; i < KGSL_MAX_REGULATORS; i++) {
+		int ret = _regulator_enable(device, &pwr->regulators[i]);
+
+		if (ret) {
+			for (i = i - 1; i >= 0; i--)
+				_regulator_disable(&pwr->regulators[i]);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static int kgsl_pwrctrl_pwrrail(struct kgsl_device *device, int state)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int status = 0;
+
+	if (test_bit(KGSL_PWRFLAGS_POWER_ON, &pwr->ctrl_flags))
+		return 0;
+
+	if (state == KGSL_PWRFLAGS_OFF) {
+		if (test_and_clear_bit(KGSL_PWRFLAGS_POWER_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_rail(device, state);
+			device->ftbl->regulator_disable_poll(device);
+		}
+	} else if (state == KGSL_PWRFLAGS_ON) {
+		if (!test_and_set_bit(KGSL_PWRFLAGS_POWER_ON,
+			&pwr->power_flags)) {
+			status = _enable_regulators(device, pwr);
+
+			if (status)
+				clear_bit(KGSL_PWRFLAGS_POWER_ON,
+					&pwr->power_flags);
+			else
+				trace_kgsl_rail(device, state);
+		}
+	}
+
+	return status;
+}
+
+static void kgsl_pwrctrl_irq(struct kgsl_device *device, int state)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	if (state == KGSL_PWRFLAGS_ON) {
+		if (!test_and_set_bit(KGSL_PWRFLAGS_IRQ_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_irq(device, state);
+			enable_irq(pwr->interrupt_num);
+		}
+	} else if (state == KGSL_PWRFLAGS_OFF) {
+		if (test_and_clear_bit(KGSL_PWRFLAGS_IRQ_ON,
+			&pwr->power_flags)) {
+			trace_kgsl_irq(device, state);
+			if (in_interrupt())
+				disable_irq_nosync(pwr->interrupt_num);
+			else
+				disable_irq(pwr->interrupt_num);
+		}
+	}
+}
+
+/**
+ * kgsl_thermal_cycle() - Work function for thermal timer.
+ * @work: The input work
+ *
+ * This function is called for work that is queued by the thermal
+ * timer.  It cycles to the alternate thermal frequency.
+ */
+static void kgsl_thermal_cycle(struct work_struct *work)
+{
+	struct kgsl_pwrctrl *pwr = container_of(work, struct kgsl_pwrctrl,
+						thermal_cycle_ws);
+	struct kgsl_device *device = container_of(pwr, struct kgsl_device,
+							pwrctrl);
+
+	if (device == NULL)
+		return;
+
+	mutex_lock(&device->mutex);
+	if (pwr->thermal_cycle == CYCLE_ACTIVE) {
+		if (pwr->thermal_highlow)
+			kgsl_pwrctrl_pwrlevel_change(device,
+					pwr->thermal_pwrlevel);
+		else
+			kgsl_pwrctrl_pwrlevel_change(device,
+					pwr->thermal_pwrlevel + 1);
+	}
+	mutex_unlock(&device->mutex);
+}
+
+static void kgsl_thermal_timer(unsigned long data)
+{
+	struct kgsl_device *device = (struct kgsl_device *) data;
+
+	/* Keep the timer running consistently despite processing time */
+	if (device->pwrctrl.thermal_highlow) {
+		mod_timer(&device->pwrctrl.thermal_timer,
+					jiffies +
+					device->pwrctrl.thermal_timeout);
+		device->pwrctrl.thermal_highlow = 0;
+	} else {
+		mod_timer(&device->pwrctrl.thermal_timer,
+					jiffies + (TH_HZ -
+					device->pwrctrl.thermal_timeout));
+		device->pwrctrl.thermal_highlow = 1;
+	}
+	/* Have work run in a non-interrupt context. */
+	kgsl_schedule_work(&device->pwrctrl.thermal_cycle_ws);
+}
+
+#ifdef CONFIG_DEVFREQ_GOV_QCOM_GPUBW_MON
+static int kgsl_pwrctrl_vbif_init(void)
+{
+	devfreq_vbif_register_callback(kgsl_get_bw);
+	return 0;
+}
+#else
+static int kgsl_pwrctrl_vbif_init(void)
+{
+	return 0;
+}
+#endif
+
+static int _get_regulator(struct kgsl_device *device,
+		struct kgsl_regulator *regulator, const char *str)
+{
+	regulator->reg = devm_regulator_get(&device->pdev->dev, str);
+	if (IS_ERR(regulator->reg)) {
+		KGSL_CORE_ERR("Couldn't get regulator: %s (%ld)\n",
+			str, PTR_ERR(regulator->reg));
+		return PTR_ERR(regulator->reg);
+	}
+
+	strlcpy(regulator->name, str, sizeof(regulator->name));
+	return 0;
+}
+
+static int get_legacy_regulators(struct kgsl_device *device)
+{
+	struct device *dev = &device->pdev->dev;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int ret;
+
+	ret = _get_regulator(device, &pwr->regulators[0], "vdd");
+
+	/* Use vddcx only on targets that have it. */
+	if (ret == 0 && of_find_property(dev->of_node, "vddcx-supply", NULL))
+		ret = _get_regulator(device, &pwr->regulators[1], "vddcx");
+
+	return ret;
+}
+
+static int get_regulators(struct kgsl_device *device)
+{
+	struct device *dev = &device->pdev->dev;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int index = 0;
+	const char *name;
+	struct property *prop;
+
+	if (!of_find_property(dev->of_node, "regulator-names", NULL))
+		return get_legacy_regulators(device);
+
+	of_property_for_each_string(dev->of_node,
+		"regulator-names", prop, name) {
+		int ret;
+
+		if (index == KGSL_MAX_REGULATORS) {
+			KGSL_CORE_ERR("Too many regulators defined\n");
+			return -ENOMEM;
+		}
+
+		ret = _get_regulator(device, &pwr->regulators[index], name);
+		if (ret)
+			return ret;
+		index++;
+	}
+
+	return 0;
+}
+
+static int _get_clocks(struct kgsl_device *device)
+{
+	struct device *dev = &device->pdev->dev;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	const char *name;
+	struct property *prop;
+
+	pwr->isense_clk_indx = 0;
+	of_property_for_each_string(dev->of_node, "clock-names", prop, name) {
+		int i;
+
+		for (i = 0; i < KGSL_MAX_CLKS; i++) {
+			if (pwr->grp_clks[i] || strcmp(clocks[i], name))
+				continue;
+
+			pwr->grp_clks[i] = devm_clk_get(dev, name);
+
+			if (IS_ERR(pwr->grp_clks[i])) {
+				int ret = PTR_ERR(pwr->grp_clks[i]);
+
+				KGSL_CORE_ERR("Couldn't get clock: %s (%d)\n",
+					name, ret);
+				pwr->grp_clks[i] = NULL;
+				return ret;
+			}
+
+			if (!strcmp(name, "isense_clk"))
+				pwr->isense_clk_indx = i;
+
+			if (device->ftbl->clk_set_options)
+				device->ftbl->clk_set_options(device, name,
+					pwr->grp_clks[i]);
+			break;
+		}
+	}
+
+	if (pwr->isense_clk_indx && of_property_read_u32(dev->of_node,
+		"qcom,isense-clk-on-level", &pwr->isense_clk_on_level)) {
+		KGSL_CORE_ERR("Couldn't get isense clock on level\n");
+		return -ENXIO;
+	}
+	return 0;
+}
+
+static int _isense_clk_set_rate(struct kgsl_pwrctrl *pwr, int level)
+{
+	int rate;
+
+	if (!pwr->isense_clk_indx)
+		return -EINVAL;
+
+	rate = clk_round_rate(pwr->grp_clks[pwr->isense_clk_indx],
+		level > pwr->isense_clk_on_level ?
+		KGSL_XO_CLK_FREQ : KGSL_ISENSE_CLK_FREQ);
+	return clk_set_rate(pwr->grp_clks[pwr->isense_clk_indx], rate);
+}
+
+int kgsl_pwrctrl_init(struct kgsl_device *device)
+{
+	int i, k, m, n = 0, result;
+	struct platform_device *pdev = device->pdev;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct device_node *ocmem_bus_node;
+	struct msm_bus_scale_pdata *ocmem_scale_table = NULL;
+	struct msm_bus_scale_pdata *bus_scale_table;
+	struct device_node *gpubw_dev_node = NULL;
+	struct platform_device *p2dev;
+
+	bus_scale_table = msm_bus_cl_get_pdata(device->pdev);
+	if (bus_scale_table == NULL)
+		return -EINVAL;
+
+	result = _get_clocks(device);
+	if (result)
+		return result;
+
+	/* Make sure we have a source clk for freq setting */
+	if (pwr->grp_clks[0] == NULL)
+		pwr->grp_clks[0] = pwr->grp_clks[1];
+
+	/* Getting gfx-bimc-interface-clk frequency */
+	if (!of_property_read_u32(pdev->dev.of_node,
+			"qcom,gpu-bimc-interface-clk-freq",
+			&pwr->gpu_bimc_int_clk_freq))
+		pwr->gpu_bimc_int_clk = devm_clk_get(&pdev->dev,
+					"bimc_gpu_clk");
+
+	if (of_property_read_bool(pdev->dev.of_node, "qcom,no-nap"))
+		device->pwrctrl.ctrl_flags |= BIT(KGSL_PWRFLAGS_NAP_OFF);
+
+	if (pwr->num_pwrlevels == 0) {
+		KGSL_PWR_ERR(device, "No power levels are defined\n");
+		return -EINVAL;
+	}
+
+	/* Initialize the user and thermal clock constraints */
+
+	pwr->max_pwrlevel = 0;
+	pwr->min_pwrlevel = pwr->num_pwrlevels - 2;
+	pwr->thermal_pwrlevel = 0;
+
+	pwr->wakeup_maxpwrlevel = 0;
+
+	for (i = 0; i < pwr->num_pwrlevels; i++) {
+		unsigned int freq = pwr->pwrlevels[i].gpu_freq;
+
+		if (freq > 0)
+			freq = clk_round_rate(pwr->grp_clks[0], freq);
+
+		pwr->pwrlevels[i].gpu_freq = freq;
+	}
+
+	clk_set_rate(pwr->grp_clks[0],
+		pwr->pwrlevels[pwr->num_pwrlevels - 1].gpu_freq);
+
+	clk_set_rate(pwr->grp_clks[6],
+		clk_round_rate(pwr->grp_clks[6], KGSL_RBBMTIMER_CLK_FREQ));
+
+	_isense_clk_set_rate(pwr, pwr->num_pwrlevels - 1);
+
+	result = get_regulators(device);
+	if (result)
+		return result;
+
+	pwr->power_flags = 0;
+
+	kgsl_property_read_u32(device, "qcom,l2pc-cpu-mask",
+			&pwr->l2pc_cpus_mask);
+
+	pm_runtime_enable(&pdev->dev);
+
+	ocmem_bus_node = of_find_node_by_name(
+				device->pdev->dev.of_node,
+				"qcom,ocmem-bus-client");
+	/* If platform has split ocmem bus client - use it */
+	if (ocmem_bus_node) {
+		ocmem_scale_table = msm_bus_pdata_from_node
+				(device->pdev, ocmem_bus_node);
+		if (ocmem_scale_table)
+			pwr->ocmem_pcl = msm_bus_scale_register_client
+					(ocmem_scale_table);
+
+		if (!pwr->ocmem_pcl)
+			return -EINVAL;
+	}
+
+	/* Bus width in bytes, set it to zero if not found */
+	if (of_property_read_u32(pdev->dev.of_node, "qcom,bus-width",
+		&pwr->bus_width))
+		pwr->bus_width = 0;
+
+	/* Check if gpu bandwidth vote device is defined in dts */
+	if (pwr->bus_control)
+		/* Check if gpu bandwidth vote device is defined in dts */
+		gpubw_dev_node = of_parse_phandle(pdev->dev.of_node,
+					"qcom,gpubw-dev", 0);
+
+	/*
+	 * Governor support enables the gpu bus scaling via governor
+	 * and hence no need to register for bus scaling client
+	 * if gpubw-dev is defined.
+	 */
+	if (gpubw_dev_node) {
+		p2dev = of_find_device_by_node(gpubw_dev_node);
+		if (p2dev)
+			pwr->devbw = &p2dev->dev;
+	} else {
+		/*
+		 * Register for gpu bus scaling if governor support
+		 * is not enabled and gpu bus voting is to be done
+		 * from the driver.
+		 */
+		pwr->pcl = msm_bus_scale_register_client(bus_scale_table);
+		if (pwr->pcl == 0)
+			return -EINVAL;
+	}
+
+	pwr->bus_ib = kzalloc(bus_scale_table->num_usecases *
+		sizeof(*pwr->bus_ib), GFP_KERNEL);
+	if (pwr->bus_ib == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Pull the BW vote out of the bus table.  They will be used to
+	 * calculate the ratio between the votes.
+	 */
+	for (i = 0; i < bus_scale_table->num_usecases; i++) {
+		struct msm_bus_paths *usecase =
+				&bus_scale_table->usecase[i];
+		struct msm_bus_vectors *vector = &usecase->vectors[0];
+
+		if (vector->dst == MSM_BUS_SLAVE_EBI_CH0 &&
+				vector->ib != 0) {
+
+			if (i < KGSL_MAX_BUSLEVELS) {
+				/* Convert bytes to Mbytes. */
+				ib_votes[i] =
+					DIV_ROUND_UP_ULL(vector->ib, 1048576)
+					- 1;
+				if (ib_votes[i] > ib_votes[max_vote_buslevel])
+					max_vote_buslevel = i;
+			}
+
+			/* check for duplicate values */
+			for (k = 0; k < n; k++)
+				if (vector->ib == pwr->bus_ib[k])
+					break;
+
+			/* if this is a new ib value, save it */
+			if (k == n) {
+				pwr->bus_ib[k] = vector->ib;
+				n++;
+				/* find which pwrlevels use this ib */
+				for (m = 0; m < pwr->num_pwrlevels - 1; m++) {
+					if (bus_scale_table->
+						usecase[pwr->pwrlevels[m].
+						bus_freq].vectors[0].ib
+						== vector->ib)
+						pwr->bus_index[m] = k;
+				}
+			}
+		}
+	}
+
+	INIT_WORK(&pwr->thermal_cycle_ws, kgsl_thermal_cycle);
+	setup_timer(&pwr->thermal_timer, kgsl_thermal_timer,
+			(unsigned long) device);
+
+	INIT_LIST_HEAD(&pwr->limits);
+	spin_lock_init(&pwr->limits_lock);
+	pwr->sysfs_pwr_limit = kgsl_pwr_limits_add(KGSL_DEVICE_3D0);
+
+	kgsl_pwrctrl_vbif_init();
+
+	/* temperature sensor name */
+	of_property_read_string(pdev->dev.of_node, "qcom,tzone-name",
+		&pwr->tzone_name);
+
+	return result;
+}
+
+void kgsl_pwrctrl_close(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int i;
+
+	KGSL_PWR_INFO(device, "close device %d\n", device->id);
+
+	pm_runtime_disable(&device->pdev->dev);
+
+	if (pwr->pcl)
+		msm_bus_scale_unregister_client(pwr->pcl);
+
+	pwr->pcl = 0;
+
+	if (pwr->ocmem_pcl)
+		msm_bus_scale_unregister_client(pwr->ocmem_pcl);
+
+	pwr->ocmem_pcl = 0;
+
+	for (i = 0; i < KGSL_MAX_REGULATORS; i++)
+		pwr->regulators[i].reg = NULL;
+
+	for (i = 0; i < KGSL_MAX_REGULATORS; i++)
+		pwr->grp_clks[i] = NULL;
+
+	if (pwr->gpu_bimc_int_clk)
+		devm_clk_put(&device->pdev->dev, pwr->gpu_bimc_int_clk);
+
+	pwr->power_flags = 0;
+
+	if (!IS_ERR_OR_NULL(pwr->sysfs_pwr_limit)) {
+		list_del(&pwr->sysfs_pwr_limit->node);
+		kfree(pwr->sysfs_pwr_limit);
+		pwr->sysfs_pwr_limit = NULL;
+	}
+	kfree(pwr->bus_ib);
+}
+
+/**
+ * kgsl_idle_check() - Work function for GPU interrupts and idle timeouts.
+ * @device: The device
+ *
+ * This function is called for work that is queued by the interrupt
+ * handler or the idle timer. It attempts to transition to a clocks
+ * off state if the active_cnt is 0 and the hardware is idle.
+ */
+void kgsl_idle_check(struct work_struct *work)
+{
+	struct kgsl_device *device = container_of(work, struct kgsl_device,
+							idle_check_ws);
+	int ret = 0;
+	unsigned int requested_state;
+
+	mutex_lock(&device->mutex);
+
+	requested_state = device->requested_state;
+
+	if (device->state == KGSL_STATE_ACTIVE
+		   || device->state ==  KGSL_STATE_NAP) {
+
+		if (!atomic_read(&device->active_cnt)) {
+			ret = kgsl_pwrctrl_change_state(device,
+					device->requested_state);
+			if (ret == -EBUSY) {
+				/*
+				 * If the GPU is currently busy, restore
+				 * the requested state and reschedule
+				 * idle work.
+				 */
+				kgsl_pwrctrl_request_state(device,
+					requested_state);
+				kgsl_schedule_work(&device->idle_check_ws);
+			}
+		}
+
+		if (!ret)
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+
+		if (device->state == KGSL_STATE_ACTIVE)
+			mod_timer(&device->idle_timer,
+					jiffies +
+					device->pwrctrl.interval_timeout);
+	}
+	kgsl_pwrscale_update(device);
+	mutex_unlock(&device->mutex);
+}
+EXPORT_SYMBOL(kgsl_idle_check);
+
+void kgsl_timer(unsigned long data)
+{
+	struct kgsl_device *device = (struct kgsl_device *) data;
+
+	KGSL_PWR_INFO(device, "idle timer expired device %d\n", device->id);
+	if (device->requested_state != KGSL_STATE_SUSPEND) {
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_SLUMBER);
+		/* Have work run in a non-interrupt context. */
+		kgsl_schedule_work(&device->idle_check_ws);
+	}
+}
+
+static bool kgsl_pwrctrl_isenabled(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+
+	return ((test_bit(KGSL_PWRFLAGS_CLK_ON, &pwr->power_flags) != 0) &&
+		(test_bit(KGSL_PWRFLAGS_AXI_ON, &pwr->power_flags) != 0));
+}
+
+/**
+ * kgsl_pre_hwaccess - Enforce preconditions for touching registers
+ * @device: The device
+ *
+ * This function ensures that the correct lock is held and that the GPU
+ * clock is on immediately before a register is read or written. Note
+ * that this function does not check active_cnt because the registers
+ * must be accessed during device start and stop, when the active_cnt
+ * may legitimately be 0.
+ */
+void kgsl_pre_hwaccess(struct kgsl_device *device)
+{
+	/* In order to touch a register you must hold the device mutex */
+	WARN_ON(!mutex_is_locked(&device->mutex));
+
+	/* A register access without device power will cause a fatal timeout */
+	BUG_ON(!kgsl_pwrctrl_isenabled(device));
+}
+EXPORT_SYMBOL(kgsl_pre_hwaccess);
+
+static int kgsl_pwrctrl_enable(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int level, status;
+
+	if (pwr->wakeup_maxpwrlevel) {
+		level = pwr->max_pwrlevel;
+		pwr->wakeup_maxpwrlevel = 0;
+	} else if (kgsl_popp_check(device)) {
+		level = pwr->active_pwrlevel;
+	} else {
+		level = pwr->default_pwrlevel;
+	}
+
+	kgsl_pwrctrl_pwrlevel_change(device, level);
+
+	/* Order pwrrail/clk sequence based upon platform */
+	status = kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_ON);
+	if (status)
+		return status;
+	kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON, KGSL_STATE_ACTIVE);
+	kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON);
+	return device->ftbl->regulator_enable(device);
+}
+
+static void kgsl_pwrctrl_disable(struct kgsl_device *device)
+{
+	/* Order pwrrail/clk sequence based upon platform */
+	device->ftbl->regulator_disable(device);
+	kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_OFF);
+	kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_SLUMBER);
+	kgsl_pwrctrl_pwrrail(device, KGSL_PWRFLAGS_OFF);
+}
+
+/**
+ * _init() - Get the GPU ready to start, but don't turn anything on
+ * @device - Pointer to the kgsl_device struct
+ */
+static int _init(struct kgsl_device *device)
+{
+	int status = 0;
+
+	switch (device->state) {
+	case KGSL_STATE_NAP:
+		/* Force power on to do the stop */
+		status = kgsl_pwrctrl_enable(device);
+	case KGSL_STATE_ACTIVE:
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+		del_timer_sync(&device->idle_timer);
+		device->ftbl->stop(device);
+		/* fall through */
+	case KGSL_STATE_AWARE:
+		kgsl_pwrctrl_disable(device);
+		/* fall through */
+	case KGSL_STATE_SLUMBER:
+	case KGSL_STATE_NONE:
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_INIT);
+	}
+
+	return status;
+}
+
+/**
+ * _wake() - Power up the GPU from a slumber state
+ * @device - Pointer to the kgsl_device struct
+ *
+ * Resume the GPU from a lower power state to ACTIVE.
+ */
+static int _wake(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int status = 0;
+
+	switch (device->state) {
+	case KGSL_STATE_SUSPEND:
+		complete_all(&device->hwaccess_gate);
+		/* Call the GPU specific resume function */
+		device->ftbl->resume(device);
+		/* fall through */
+	case KGSL_STATE_SLUMBER:
+		status = device->ftbl->start(device,
+				device->pwrctrl.superfast);
+		device->pwrctrl.superfast = false;
+
+		if (status) {
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+			KGSL_DRV_ERR(device, "start failed %d\n", status);
+			break;
+		}
+		kgsl_pwrctrl_axi(device, KGSL_PWRFLAGS_ON);
+		kgsl_pwrscale_wake(device);
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
+		/* fall through */
+	case KGSL_STATE_NAP:
+		/* Turn on the core clocks */
+		kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_ON, KGSL_STATE_ACTIVE);
+
+		/*
+		 * No need to turn on/off irq here as it no longer affects
+		 * power collapse
+		 */
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+
+		/*
+		 * Change register settings if any after pwrlevel change.
+		 * If there was dcvs level change during nap - call
+		 * pre and post in the row after clock is enabled.
+		 */
+		kgsl_pwrctrl_pwrlevel_change_settings(device, 0);
+		kgsl_pwrctrl_pwrlevel_change_settings(device, 1);
+		/* All settings for power level transitions are complete*/
+		pwr->previous_pwrlevel = pwr->active_pwrlevel;
+		mod_timer(&device->idle_timer, jiffies +
+				device->pwrctrl.interval_timeout);
+		break;
+	case KGSL_STATE_AWARE:
+		/* Enable state before turning on irq */
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_ACTIVE);
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_ON);
+		mod_timer(&device->idle_timer, jiffies +
+				device->pwrctrl.interval_timeout);
+		break;
+	default:
+		KGSL_PWR_WARN(device, "unhandled state %s\n",
+				kgsl_pwrstate_to_str(device->state));
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+		status = -EINVAL;
+		break;
+	}
+	return status;
+}
+
+/*
+ * _aware() - Put device into AWARE
+ * @device: Device pointer
+ *
+ * The GPU should be available for register reads/writes and able
+ * to communicate with the rest of the system.  However disable all
+ * paths that allow a switch to an interrupt context (interrupts &
+ * timers).
+ * Return 0 on success else error code
+ */
+static int
+_aware(struct kgsl_device *device)
+{
+	int status = 0;
+
+	switch (device->state) {
+	case KGSL_STATE_INIT:
+		status = kgsl_pwrctrl_enable(device);
+		break;
+	/* The following 3 cases shouldn't occur, but don't panic. */
+	case KGSL_STATE_NAP:
+		status = _wake(device);
+	case KGSL_STATE_ACTIVE:
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+		del_timer_sync(&device->idle_timer);
+		break;
+	case KGSL_STATE_SLUMBER:
+		status = kgsl_pwrctrl_enable(device);
+		break;
+	default:
+		status = -EINVAL;
+	}
+	if (status)
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+	else
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_AWARE);
+	return status;
+}
+
+static int
+_nap(struct kgsl_device *device)
+{
+	switch (device->state) {
+	case KGSL_STATE_ACTIVE:
+		if (!device->ftbl->is_hw_collapsible(device)) {
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+			return -EBUSY;
+		}
+
+		/*
+		 * Read HW busy counters before going to NAP state.
+		 * The data might be used by power scale governors
+		 * independently of the HW activity. For example
+		 * the simple-on-demand governor will get the latest
+		 * busy_time data even if the gpu isn't active.
+		 */
+		kgsl_pwrscale_update_stats(device);
+
+		kgsl_pwrctrl_clk(device, KGSL_PWRFLAGS_OFF, KGSL_STATE_NAP);
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_NAP);
+		/* fallthrough */
+	case KGSL_STATE_SLUMBER:
+		break;
+	case KGSL_STATE_AWARE:
+		KGSL_PWR_WARN(device,
+			"transition AWARE -> NAP is not permitted\n");
+		/* fallthrough */
+	default:
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+		break;
+	}
+	return 0;
+}
+
+static int
+_slumber(struct kgsl_device *device)
+{
+	int status = 0;
+
+	switch (device->state) {
+	case KGSL_STATE_ACTIVE:
+		if (!device->ftbl->is_hw_collapsible(device)) {
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+			return -EBUSY;
+		}
+		/* fall through */
+	case KGSL_STATE_NAP:
+		del_timer_sync(&device->idle_timer);
+		if (device->pwrctrl.thermal_cycle == CYCLE_ACTIVE) {
+			device->pwrctrl.thermal_cycle = CYCLE_ENABLE;
+			del_timer_sync(&device->pwrctrl.thermal_timer);
+		}
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+		/* make sure power is on to stop the device*/
+		status = kgsl_pwrctrl_enable(device);
+		device->ftbl->suspend_context(device);
+		device->ftbl->stop(device);
+		kgsl_pwrctrl_disable(device);
+		kgsl_pwrscale_sleep(device);
+		kgsl_pwrctrl_irq(device, KGSL_PWRFLAGS_OFF);
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+		pm_qos_update_request(&device->pwrctrl.pm_qos_req_dma,
+						PM_QOS_DEFAULT_VALUE);
+		if (device->pwrctrl.l2pc_cpus_mask)
+			pm_qos_update_request(
+					&device->pwrctrl.l2pc_cpus_qos,
+					PM_QOS_DEFAULT_VALUE);
+		break;
+	case KGSL_STATE_SUSPEND:
+		complete_all(&device->hwaccess_gate);
+		device->ftbl->resume(device);
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+		break;
+	case KGSL_STATE_AWARE:
+		kgsl_pwrctrl_disable(device);
+		kgsl_pwrctrl_set_state(device, KGSL_STATE_SLUMBER);
+		break;
+	default:
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+		break;
+
+	}
+	return status;
+}
+
+/*
+ * _suspend() - Put device into suspend
+ * @device: Device pointer
+ *
+ * Return 0 on success else error code
+ */
+static int _suspend(struct kgsl_device *device)
+{
+	int ret = 0;
+
+	if ((device->state == KGSL_STATE_NONE) ||
+			(device->state == KGSL_STATE_INIT))
+		return ret;
+
+	/* drain to prevent from more commands being submitted */
+	device->ftbl->drain(device);
+	/* wait for active count so device can be put in slumber */
+	ret = kgsl_active_count_wait(device, 0);
+	if (ret)
+		goto err;
+
+	ret = device->ftbl->idle(device);
+	if (ret)
+		goto err;
+
+	ret = _slumber(device);
+	if (ret)
+		goto err;
+
+	kgsl_pwrctrl_set_state(device, KGSL_STATE_SUSPEND);
+	return ret;
+
+err:
+	device->ftbl->resume(device);
+	KGSL_PWR_ERR(device, "device failed to SUSPEND %d\n", ret);
+	return ret;
+}
+
+/*
+ * kgsl_pwrctrl_change_state() changes the GPU state to the input
+ * @device: Pointer to a KGSL device
+ * @state: desired KGSL state
+ *
+ * Caller must hold the device mutex. If the requested state change
+ * is valid, execute it.  Otherwise return an error code explaining
+ * why the change has not taken place.  Also print an error if an
+ * unexpected state change failure occurs.  For example, a change to
+ * NAP may be rejected because the GPU is busy, this is not an error.
+ * A change to SUSPEND should go through no matter what, so if it
+ * fails an additional error message will be printed to dmesg.
+ */
+int kgsl_pwrctrl_change_state(struct kgsl_device *device, int state)
+{
+	int status = 0;
+
+	if (device->state == state)
+		return status;
+	kgsl_pwrctrl_request_state(device, state);
+
+	/* Work through the legal state transitions */
+	switch (state) {
+	case KGSL_STATE_INIT:
+		status = _init(device);
+		break;
+	case KGSL_STATE_AWARE:
+		status = _aware(device);
+		break;
+	case KGSL_STATE_ACTIVE:
+		status = _wake(device);
+		break;
+	case KGSL_STATE_NAP:
+		status = _nap(device);
+		break;
+	case KGSL_STATE_SLUMBER:
+		status = _slumber(device);
+		break;
+	case KGSL_STATE_SUSPEND:
+		status = _suspend(device);
+		break;
+	default:
+		KGSL_PWR_INFO(device, "bad state request 0x%x\n", state);
+		kgsl_pwrctrl_request_state(device, KGSL_STATE_NONE);
+		status = -EINVAL;
+		break;
+	}
+
+	/* Record the state timing info */
+	if (!status) {
+		ktime_t t = ktime_get();
+
+		_record_pwrevent(device, t, KGSL_PWREVENT_STATE);
+	}
+	return status;
+}
+EXPORT_SYMBOL(kgsl_pwrctrl_change_state);
+
+static void kgsl_pwrctrl_set_state(struct kgsl_device *device,
+				unsigned int state)
+{
+	trace_kgsl_pwr_set_state(device, state);
+	device->state = state;
+	device->requested_state = KGSL_STATE_NONE;
+}
+
+static void kgsl_pwrctrl_request_state(struct kgsl_device *device,
+				unsigned int state)
+{
+	if (state != KGSL_STATE_NONE && state != device->requested_state)
+		trace_kgsl_pwr_request_state(device, state);
+	device->requested_state = state;
+}
+
+const char *kgsl_pwrstate_to_str(unsigned int state)
+{
+	switch (state) {
+	case KGSL_STATE_NONE:
+		return "NONE";
+	case KGSL_STATE_INIT:
+		return "INIT";
+	case KGSL_STATE_AWARE:
+		return "AWARE";
+	case KGSL_STATE_ACTIVE:
+		return "ACTIVE";
+	case KGSL_STATE_NAP:
+		return "NAP";
+	case KGSL_STATE_SUSPEND:
+		return "SUSPEND";
+	case KGSL_STATE_SLUMBER:
+		return "SLUMBER";
+	default:
+		break;
+	}
+	return "UNKNOWN";
+}
+EXPORT_SYMBOL(kgsl_pwrstate_to_str);
+
+
+/**
+ * kgsl_active_count_get() - Increase the device active count
+ * @device: Pointer to a KGSL device
+ *
+ * Increase the active count for the KGSL device and turn on
+ * clocks if this is the first reference. Code paths that need
+ * to touch the hardware or wait for the hardware to complete
+ * an operation must hold an active count reference until they
+ * are finished. An error code will be returned if waking the
+ * device fails. The device mutex must be held while *calling
+ * this function.
+ */
+int kgsl_active_count_get(struct kgsl_device *device)
+{
+	int ret = 0;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EINVAL;
+
+	if ((atomic_read(&device->active_cnt) == 0) &&
+		(device->state != KGSL_STATE_ACTIVE)) {
+		mutex_unlock(&device->mutex);
+		wait_for_completion(&device->hwaccess_gate);
+		mutex_lock(&device->mutex);
+		device->pwrctrl.superfast = true;
+		ret = kgsl_pwrctrl_change_state(device, KGSL_STATE_ACTIVE);
+	}
+	if (ret == 0)
+		atomic_inc(&device->active_cnt);
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_active_count_get);
+
+/**
+ * kgsl_active_count_put() - Decrease the device active count
+ * @device: Pointer to a KGSL device
+ *
+ * Decrease the active count for the KGSL device and turn off
+ * clocks if there are no remaining references. This function will
+ * transition the device to NAP if there are no other pending state
+ * changes. It also completes the suspend gate.  The device mutex must
+ * be held while calling this function.
+ */
+void kgsl_active_count_put(struct kgsl_device *device)
+{
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (WARN(atomic_read(&device->active_cnt) == 0,
+			"Unbalanced get/put calls to KGSL active count\n"))
+		return;
+
+	if (atomic_dec_and_test(&device->active_cnt)) {
+		bool nap_on = !(device->pwrctrl.ctrl_flags &
+			BIT(KGSL_PWRFLAGS_NAP_OFF));
+		if (nap_on && device->state == KGSL_STATE_ACTIVE &&
+			device->requested_state == KGSL_STATE_NONE) {
+			kgsl_pwrctrl_request_state(device, KGSL_STATE_NAP);
+			kgsl_schedule_work(&device->idle_check_ws);
+		} else if (!nap_on) {
+			kgsl_pwrscale_update_stats(device);
+			kgsl_pwrscale_update(device);
+		}
+
+		mod_timer(&device->idle_timer,
+			jiffies + device->pwrctrl.interval_timeout);
+	}
+
+	trace_kgsl_active_count(device,
+		(unsigned long) __builtin_return_address(0));
+
+	wake_up(&device->active_cnt_wq);
+}
+EXPORT_SYMBOL(kgsl_active_count_put);
+
+static int _check_active_count(struct kgsl_device *device, int count)
+{
+	/* Return 0 if the active count is greater than the desired value */
+	return atomic_read(&device->active_cnt) > count ? 0 : 1;
+}
+
+/**
+ * kgsl_active_count_wait() - Wait for activity to finish.
+ * @device: Pointer to a KGSL device
+ * @count: Active count value to wait for
+ *
+ * Block until the active_cnt value hits the desired value
+ */
+int kgsl_active_count_wait(struct kgsl_device *device, int count)
+{
+	int result = 0;
+	long wait_jiffies = HZ;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return -EINVAL;
+
+	while (atomic_read(&device->active_cnt) > count) {
+		long ret;
+
+		mutex_unlock(&device->mutex);
+		ret = wait_event_timeout(device->active_cnt_wq,
+			_check_active_count(device, count), wait_jiffies);
+		mutex_lock(&device->mutex);
+		result = ret == 0 ? -ETIMEDOUT : 0;
+		if (!result)
+			wait_jiffies = ret;
+		else
+			break;
+	}
+
+	return result;
+}
+EXPORT_SYMBOL(kgsl_active_count_wait);
+
+/**
+ * _update_limits() - update the limits based on the current requests
+ * @limit: Pointer to the limits structure
+ * @reason: Reason for the update
+ * @level: Level if any to be set
+ *
+ * Set the thermal pwrlevel based on the current limits
+ */
+static void _update_limits(struct kgsl_pwr_limit *limit, unsigned int reason,
+							unsigned int level)
+{
+	struct kgsl_device *device = limit->device;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwr_limit *temp_limit;
+	unsigned int max_level = 0;
+
+	spin_lock(&pwr->limits_lock);
+	switch (reason) {
+	case KGSL_PWR_ADD_LIMIT:
+		list_add(&limit->node, &pwr->limits);
+		break;
+	case KGSL_PWR_DEL_LIMIT:
+		list_del(&limit->node);
+		if (list_empty(&pwr->limits))
+			goto done;
+		break;
+	case KGSL_PWR_SET_LIMIT:
+		limit->level = level;
+		break;
+	default:
+		break;
+	}
+
+	list_for_each_entry(temp_limit, &pwr->limits, node) {
+		max_level = max_t(unsigned int, max_level, temp_limit->level);
+	}
+
+done:
+	spin_unlock(&pwr->limits_lock);
+
+	mutex_lock(&device->mutex);
+	pwr->thermal_pwrlevel = max_level;
+	kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel);
+	mutex_unlock(&device->mutex);
+}
+
+/**
+ * kgsl_pwr_limits_add() - Add a new pwr limit
+ * @id: Device ID
+ *
+ * Allocate a pwr limit structure for the client, add it to the limits
+ * list and return the pointer to the client
+ */
+void *kgsl_pwr_limits_add(enum kgsl_deviceid id)
+{
+	struct kgsl_device *device = kgsl_get_device(id);
+	struct kgsl_pwr_limit *limit;
+
+	if (IS_ERR_OR_NULL(device))
+		return NULL;
+
+	limit = kzalloc(sizeof(struct kgsl_pwr_limit),
+						GFP_KERNEL);
+	if (limit == NULL)
+		return ERR_PTR(-ENOMEM);
+	limit->device = device;
+
+	_update_limits(limit, KGSL_PWR_ADD_LIMIT, 0);
+	return limit;
+}
+EXPORT_SYMBOL(kgsl_pwr_limits_add);
+
+/**
+ * kgsl_pwr_limits_del() - Unregister the pwr limit client and
+ * adjust the thermal limits
+ * @limit_ptr: Client handle
+ *
+ * Delete the client handle from the thermal list and adjust the
+ * active clocks if needed.
+ */
+void kgsl_pwr_limits_del(void *limit_ptr)
+{
+	struct kgsl_pwr_limit *limit = limit_ptr;
+
+	if (IS_ERR(limit))
+		return;
+
+	_update_limits(limit, KGSL_PWR_DEL_LIMIT, 0);
+	kfree(limit);
+}
+EXPORT_SYMBOL(kgsl_pwr_limits_del);
+
+/**
+ * kgsl_pwr_limits_set_freq() - Set the requested limit for the client
+ * @limit_ptr: Client handle
+ * @freq: Client requested frequency
+ *
+ * Set the new limit for the client and adjust the clocks
+ */
+int kgsl_pwr_limits_set_freq(void *limit_ptr, unsigned int freq)
+{
+	struct kgsl_pwrctrl *pwr;
+	struct kgsl_pwr_limit *limit = limit_ptr;
+	int level;
+
+	if (IS_ERR(limit))
+		return -EINVAL;
+
+	pwr = &limit->device->pwrctrl;
+	level = _get_nearest_pwrlevel(pwr, freq);
+	if (level < 0)
+		return -EINVAL;
+	_update_limits(limit, KGSL_PWR_SET_LIMIT, level);
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_pwr_limits_set_freq);
+
+/**
+ * kgsl_pwr_limits_set_default() - Set the default thermal limit for the client
+ * @limit_ptr: Client handle
+ *
+ * Set the default for the client and adjust the clocks
+ */
+void kgsl_pwr_limits_set_default(void *limit_ptr)
+{
+	struct kgsl_pwr_limit *limit = limit_ptr;
+
+	if (IS_ERR(limit))
+		return;
+
+	_update_limits(limit, KGSL_PWR_SET_LIMIT, 0);
+}
+EXPORT_SYMBOL(kgsl_pwr_limits_set_default);
+
+/**
+ * kgsl_pwr_limits_get_freq() - Get the current limit
+ * @id: Device ID
+ *
+ * Get the current limit set for the device
+ */
+unsigned int kgsl_pwr_limits_get_freq(enum kgsl_deviceid id)
+{
+	struct kgsl_device *device = kgsl_get_device(id);
+	struct kgsl_pwrctrl *pwr;
+	unsigned int freq;
+
+	if (IS_ERR_OR_NULL(device))
+		return 0;
+	pwr = &device->pwrctrl;
+	mutex_lock(&device->mutex);
+	freq = pwr->pwrlevels[pwr->thermal_pwrlevel].gpu_freq;
+	mutex_unlock(&device->mutex);
+
+	return freq;
+}
+EXPORT_SYMBOL(kgsl_pwr_limits_get_freq);
diff --git a/drivers/gpu/msm/kgsl_pwrctrl.h b/drivers/gpu/msm/kgsl_pwrctrl.h
new file mode 100644
index 0000000..58f16e8
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pwrctrl.h
@@ -0,0 +1,248 @@
+/* Copyright (c) 2010-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_PWRCTRL_H
+#define __KGSL_PWRCTRL_H
+
+#include <linux/pm_qos.h>
+
+/*****************************************************************************
+ * power flags
+ ****************************************************************************/
+#define KGSL_PWRFLAGS_ON   1
+#define KGSL_PWRFLAGS_OFF  0
+
+#define KGSL_PWRLEVEL_TURBO 0
+
+#define KGSL_PWR_ON	0xFFFF
+
+#define KGSL_MAX_CLKS 14
+#define KGSL_MAX_REGULATORS 2
+
+#define KGSL_MAX_PWRLEVELS 10
+
+/* Only two supported levels, min & max */
+#define KGSL_CONSTRAINT_PWR_MAXLEVELS 2
+
+#define KGSL_XO_CLK_FREQ	19200000
+#define KGSL_RBBMTIMER_CLK_FREQ	KGSL_XO_CLK_FREQ
+#define KGSL_ISENSE_CLK_FREQ	200000000
+
+/* Symbolic table for the constraint type */
+#define KGSL_CONSTRAINT_TYPES \
+	{ KGSL_CONSTRAINT_NONE, "None" }, \
+	{ KGSL_CONSTRAINT_PWRLEVEL, "Pwrlevel" }
+/* Symbolic table for the constraint sub type */
+#define KGSL_CONSTRAINT_PWRLEVEL_SUBTYPES \
+	{ KGSL_CONSTRAINT_PWR_MIN, "Min" }, \
+	{ KGSL_CONSTRAINT_PWR_MAX, "Max" }
+
+#define KGSL_PWR_ADD_LIMIT 0
+#define KGSL_PWR_DEL_LIMIT 1
+#define KGSL_PWR_SET_LIMIT 2
+
+enum kgsl_pwrctrl_timer_type {
+	KGSL_PWR_IDLE_TIMER,
+};
+
+/*
+ * States for thermal cycling.  _DISABLE means that no cycling has been
+ * requested.  _ENABLE means that cycling has been requested, but GPU
+ * DCVS is currently recommending running at a lower frequency than the
+ * cycle frequency.  _ACTIVE means that the frequency is actively being
+ * cycled.
+ */
+#define CYCLE_DISABLE	0
+#define CYCLE_ENABLE	1
+#define CYCLE_ACTIVE	2
+
+struct platform_device;
+
+struct kgsl_clk_stats {
+	unsigned int busy;
+	unsigned int total;
+	unsigned int busy_old;
+	unsigned int total_old;
+};
+
+struct kgsl_pwr_constraint {
+	unsigned int type;
+	unsigned int sub_type;
+	union {
+		struct {
+			unsigned int level;
+		} pwrlevel;
+	} hint;
+	unsigned long expires;
+	uint32_t owner_id;
+};
+
+/**
+ * struct kgsl_pwrlevel - Struct holding different pwrlevel info obtained from
+ * from dtsi file
+ * @gpu_freq:          GPU frequency vote in Hz
+ * @bus_freq:          Bus bandwidth vote index
+ * @bus_min:           Min bus index @gpu_freq
+ * @bus_max:           Max bus index @gpu_freq
+ */
+struct kgsl_pwrlevel {
+	unsigned int gpu_freq;
+	unsigned int bus_freq;
+	unsigned int bus_min;
+	unsigned int bus_max;
+};
+
+struct kgsl_regulator {
+	struct regulator *reg;
+	char name[8];
+};
+
+/**
+ * struct kgsl_pwrctrl - Power control settings for a KGSL device
+ * @interrupt_num - The interrupt number for the device
+ * @grp_clks - Array of clocks structures that we control
+ * @power_flags - Control flags for power
+ * @pwrlevels - List of supported power levels
+ * @active_pwrlevel - The currently active power level
+ * @previous_pwrlevel - The power level before transition
+ * @thermal_pwrlevel - maximum powerlevel constraint from thermal
+ * @default_pwrlevel - device wake up power level
+ * @max_pwrlevel - maximum allowable powerlevel per the user
+ * @min_pwrlevel - minimum allowable powerlevel per the user
+ * @num_pwrlevels - number of available power levels
+ * @interval_timeout - timeout in jiffies to be idle before a power event
+ * @clock_times - Each GPU frequency's accumulated active time in us
+ * @regulators - array of pointers to kgsl_regulator structs
+ * @pcl - bus scale identifier
+ * @ocmem - ocmem bus scale identifier
+ * @irq_name - resource name for the IRQ
+ * @clk_stats - structure of clock statistics
+ * @l2pc_cpus_mask - mask to avoid L2PC on masked CPUs
+ * @l2pc_cpus_qos - qos structure to avoid L2PC on CPUs
+ * @pm_qos_req_dma - the power management quality of service structure
+ * @pm_qos_active_latency - allowed CPU latency in microseconds when active
+ * @pm_qos_cpu_mask_latency - allowed CPU mask latency in microseconds
+ * @pm_qos_wakeup_latency - allowed CPU latency in microseconds during wakeup
+ * @bus_control - true if the bus calculation is independent
+ * @bus_mod - modifier from the current power level for the bus vote
+ * @bus_percent_ab - current percent of total possible bus usage
+ * @bus_width - target specific bus width in number of bytes
+ * @bus_ab_mbytes - AB vote in Mbytes for current bus usage
+ * @bus_index - default bus index into the bus_ib table
+ * @bus_ib - the set of unique ib requests needed for the bus calculation
+ * @constraint - currently active power constraint
+ * @superfast - Boolean flag to indicate that the GPU start should be run in the
+ * higher priority thread
+ * @thermal_cycle_ws - Work struct for scheduling thermal cycling
+ * @thermal_timer - Timer for thermal cycling
+ * @thermal_timeout - Cycling timeout for switching between frequencies
+ * @thermal_cycle - Is thermal cycling enabled
+ * @thermal_highlow - flag for swithcing between high and low frequency
+ * @limits - list head for limits
+ * @limits_lock - spin lock to protect limits list
+ * @sysfs_pwr_limit - pointer to the sysfs limits node
+ * isense_clk_indx - index of isense clock, 0 if no isense
+ * isense_clk_on_level - isense clock rate is XO rate below this level.
+ * tzone_name - pointer to thermal zone name of GPU temperature sensor
+ */
+
+struct kgsl_pwrctrl {
+	int interrupt_num;
+	struct clk *grp_clks[KGSL_MAX_CLKS];
+	struct clk *gpu_bimc_int_clk;
+	int isense_clk_indx;
+	int isense_clk_on_level;
+	unsigned long power_flags;
+	unsigned long ctrl_flags;
+	struct kgsl_pwrlevel pwrlevels[KGSL_MAX_PWRLEVELS];
+	unsigned int active_pwrlevel;
+	unsigned int previous_pwrlevel;
+	unsigned int thermal_pwrlevel;
+	unsigned int default_pwrlevel;
+	unsigned int wakeup_maxpwrlevel;
+	unsigned int max_pwrlevel;
+	unsigned int min_pwrlevel;
+	unsigned int num_pwrlevels;
+	unsigned long interval_timeout;
+	u64 clock_times[KGSL_MAX_PWRLEVELS];
+	struct kgsl_regulator regulators[KGSL_MAX_REGULATORS];
+	uint32_t pcl;
+	uint32_t ocmem_pcl;
+	const char *irq_name;
+	struct kgsl_clk_stats clk_stats;
+	unsigned int l2pc_cpus_mask;
+	struct pm_qos_request l2pc_cpus_qos;
+	struct pm_qos_request pm_qos_req_dma;
+	unsigned int pm_qos_active_latency;
+	unsigned int pm_qos_cpu_mask_latency;
+	unsigned int pm_qos_wakeup_latency;
+	bool bus_control;
+	int bus_mod;
+	unsigned int bus_percent_ab;
+	unsigned int bus_width;
+	unsigned long bus_ab_mbytes;
+	struct device *devbw;
+	unsigned int bus_index[KGSL_MAX_PWRLEVELS];
+	uint64_t *bus_ib;
+	struct kgsl_pwr_constraint constraint;
+	bool superfast;
+	struct work_struct thermal_cycle_ws;
+	struct timer_list thermal_timer;
+	uint32_t thermal_timeout;
+	uint32_t thermal_cycle;
+	uint32_t thermal_highlow;
+	struct list_head limits;
+	spinlock_t limits_lock;
+	struct kgsl_pwr_limit *sysfs_pwr_limit;
+	unsigned int gpu_bimc_int_clk_freq;
+	bool gpu_bimc_interface_enabled;
+	const char *tzone_name;
+};
+
+int kgsl_pwrctrl_init(struct kgsl_device *device);
+void kgsl_pwrctrl_close(struct kgsl_device *device);
+void kgsl_timer(unsigned long data);
+void kgsl_idle_check(struct work_struct *work);
+void kgsl_pre_hwaccess(struct kgsl_device *device);
+void kgsl_pwrctrl_pwrlevel_change(struct kgsl_device *device,
+	unsigned int level);
+void kgsl_pwrctrl_buslevel_update(struct kgsl_device *device,
+	bool on);
+int kgsl_pwrctrl_init_sysfs(struct kgsl_device *device);
+void kgsl_pwrctrl_uninit_sysfs(struct kgsl_device *device);
+int kgsl_pwrctrl_change_state(struct kgsl_device *device, int state);
+
+static inline unsigned long kgsl_get_clkrate(struct clk *clk)
+{
+	return (clk != NULL) ? clk_get_rate(clk) : 0;
+}
+
+/*
+ * kgsl_pwrctrl_active_freq - get currently configured frequency
+ * @pwr: kgsl_pwrctrl structure for the device
+ *
+ * Returns the currently configured frequency for the device.
+ */
+static inline unsigned long
+kgsl_pwrctrl_active_freq(struct kgsl_pwrctrl *pwr)
+{
+	return pwr->pwrlevels[pwr->active_pwrlevel].gpu_freq;
+}
+
+int __must_check kgsl_active_count_get(struct kgsl_device *device);
+void kgsl_active_count_put(struct kgsl_device *device);
+int kgsl_active_count_wait(struct kgsl_device *device, int count);
+void kgsl_pwrctrl_busy_time(struct kgsl_device *device, u64 time, u64 busy);
+void kgsl_pwrctrl_set_constraint(struct kgsl_device *device,
+			struct kgsl_pwr_constraint *pwrc, uint32_t id);
+void kgsl_pwrctrl_update_l2pc(struct kgsl_device *device);
+#endif /* __KGSL_PWRCTRL_H */
diff --git a/drivers/gpu/msm/kgsl_pwrscale.c b/drivers/gpu/msm/kgsl_pwrscale.c
new file mode 100644
index 0000000..77ff91b
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pwrscale.c
@@ -0,0 +1,986 @@
+/* Copyright (c) 2010-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/kernel.h>
+
+#include "kgsl.h"
+#include "kgsl_pwrscale.h"
+#include "kgsl_device.h"
+#include "kgsl_trace.h"
+
+/*
+ * "SLEEP" is generic counting both NAP & SLUMBER
+ * PERIODS generally won't exceed 9 for the relavent 150msec
+ * window, but can be significantly smaller and still POPP
+ * pushable in cases where SLUMBER is involved.  Hence the
+ * additional reliance on PERCENT to make sure a reasonable
+ * amount of down-time actually exists.
+ */
+#define MIN_SLEEP_PERIODS	3
+#define MIN_SLEEP_PERCENT	5
+
+static struct kgsl_popp popp_param[POPP_MAX] = {
+	{0, 0},
+	{-5, 20},
+	{-5, 0},
+	{0, 0},
+};
+
+static void do_devfreq_suspend(struct work_struct *work);
+static void do_devfreq_resume(struct work_struct *work);
+static void do_devfreq_notify(struct work_struct *work);
+
+/*
+ * These variables are used to keep the latest data
+ * returned by kgsl_devfreq_get_dev_status
+ */
+static struct xstats last_xstats;
+static struct devfreq_dev_status last_status = { .private_data = &last_xstats };
+
+/*
+ * kgsl_pwrscale_sleep - notify governor that device is going off
+ * @device: The device
+ *
+ * Called shortly after all pending work is completed.
+ */
+void kgsl_pwrscale_sleep(struct kgsl_device *device)
+{
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+
+	if (!device->pwrscale.enabled)
+		return;
+	device->pwrscale.on_time = 0;
+
+	psc->popp_level = 0;
+	clear_bit(POPP_PUSH, &device->pwrscale.popp_state);
+
+	/* to call devfreq_suspend_device() from a kernel thread */
+	queue_work(device->pwrscale.devfreq_wq,
+		&device->pwrscale.devfreq_suspend_ws);
+}
+EXPORT_SYMBOL(kgsl_pwrscale_sleep);
+
+/*
+ * kgsl_pwrscale_wake - notify governor that device is going on
+ * @device: The device
+ *
+ * Called when the device is returning to an active state.
+ */
+void kgsl_pwrscale_wake(struct kgsl_device *device)
+{
+	struct kgsl_power_stats stats;
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+
+	if (!device->pwrscale.enabled)
+		return;
+	/* clear old stats before waking */
+	memset(&psc->accum_stats, 0, sizeof(psc->accum_stats));
+	memset(&last_xstats, 0, sizeof(last_xstats));
+
+	/* and any hw activity from waking up*/
+	device->ftbl->power_stats(device, &stats);
+
+	psc->time = ktime_get();
+
+	psc->next_governor_call = ktime_add_us(psc->time,
+			KGSL_GOVERNOR_CALL_INTERVAL);
+
+	/* to call devfreq_resume_device() from a kernel thread */
+	queue_work(psc->devfreq_wq, &psc->devfreq_resume_ws);
+}
+EXPORT_SYMBOL(kgsl_pwrscale_wake);
+
+/*
+ * kgsl_pwrscale_busy - update pwrscale state for new work
+ * @device: The device
+ *
+ * Called when new work is submitted to the device.
+ * This function must be called with the device mutex locked.
+ */
+void kgsl_pwrscale_busy(struct kgsl_device *device)
+{
+	if (!device->pwrscale.enabled)
+		return;
+	if (device->pwrscale.on_time == 0)
+		device->pwrscale.on_time = ktime_to_us(ktime_get());
+}
+EXPORT_SYMBOL(kgsl_pwrscale_busy);
+
+/**
+ * kgsl_pwrscale_update_stats() - update device busy statistics
+ * @device: The device
+ *
+ * Read hardware busy counters and accumulate the results.
+ */
+void kgsl_pwrscale_update_stats(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwrctrl = &device->pwrctrl;
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (!psc->enabled)
+		return;
+
+	if (device->state == KGSL_STATE_ACTIVE) {
+		struct kgsl_power_stats stats;
+
+		device->ftbl->power_stats(device, &stats);
+		if (psc->popp_level) {
+			u64 x = stats.busy_time;
+			u64 y = stats.ram_time;
+
+			do_div(x, 100);
+			do_div(y, 100);
+			x *= popp_param[psc->popp_level].gpu_x;
+			y *= popp_param[psc->popp_level].ddr_y;
+			trace_kgsl_popp_mod(device, x, y);
+			stats.busy_time += x;
+			stats.ram_time += y;
+		}
+		device->pwrscale.accum_stats.busy_time += stats.busy_time;
+		device->pwrscale.accum_stats.ram_time += stats.ram_time;
+		device->pwrscale.accum_stats.ram_wait += stats.ram_wait;
+		pwrctrl->clock_times[pwrctrl->active_pwrlevel] +=
+				stats.busy_time;
+	}
+}
+EXPORT_SYMBOL(kgsl_pwrscale_update_stats);
+
+/**
+ * kgsl_pwrscale_update() - update device busy statistics
+ * @device: The device
+ *
+ * If enough time has passed schedule the next call to devfreq
+ * get_dev_status.
+ */
+void kgsl_pwrscale_update(struct kgsl_device *device)
+{
+	ktime_t t;
+
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (!device->pwrscale.enabled)
+		return;
+
+	t = ktime_get();
+	if (ktime_compare(t, device->pwrscale.next_governor_call) < 0)
+		return;
+
+	device->pwrscale.next_governor_call = ktime_add_us(t,
+			KGSL_GOVERNOR_CALL_INTERVAL);
+
+	/* to call srcu_notifier_call_chain() from a kernel thread */
+	if (device->state != KGSL_STATE_SLUMBER)
+		queue_work(device->pwrscale.devfreq_wq,
+			&device->pwrscale.devfreq_notify_ws);
+}
+EXPORT_SYMBOL(kgsl_pwrscale_update);
+
+/*
+ * kgsl_pwrscale_disable - temporarily disable the governor
+ * @device: The device
+ * @turbo: Indicates if pwrlevel should be forced to turbo
+ *
+ * Temporarily disable the governor, to prevent interference
+ * with profiling tools that expect a fixed clock frequency.
+ * This function must be called with the device mutex locked.
+ */
+void kgsl_pwrscale_disable(struct kgsl_device *device, bool turbo)
+{
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (device->pwrscale.devfreqptr)
+		queue_work(device->pwrscale.devfreq_wq,
+			&device->pwrscale.devfreq_suspend_ws);
+	device->pwrscale.enabled = false;
+	if (turbo)
+		kgsl_pwrctrl_pwrlevel_change(device, KGSL_PWRLEVEL_TURBO);
+}
+EXPORT_SYMBOL(kgsl_pwrscale_disable);
+
+/*
+ * kgsl_pwrscale_enable - re-enable the governor
+ * @device: The device
+ *
+ * Reenable the governor after a kgsl_pwrscale_disable() call.
+ * This function must be called with the device mutex locked.
+ */
+void kgsl_pwrscale_enable(struct kgsl_device *device)
+{
+	if (WARN_ON(!mutex_is_locked(&device->mutex)))
+		return;
+
+	if (device->pwrscale.devfreqptr) {
+		queue_work(device->pwrscale.devfreq_wq,
+			&device->pwrscale.devfreq_resume_ws);
+		device->pwrscale.enabled = true;
+	} else {
+		/*
+		 * Don't enable it if devfreq is not set and let the device
+		 * run at default level;
+		 */
+		kgsl_pwrctrl_pwrlevel_change(device,
+					device->pwrctrl.default_pwrlevel);
+		device->pwrscale.enabled = false;
+	}
+}
+EXPORT_SYMBOL(kgsl_pwrscale_enable);
+
+static int _thermal_adjust(struct kgsl_pwrctrl *pwr, int level)
+{
+	if (level < pwr->active_pwrlevel)
+		return pwr->active_pwrlevel;
+
+	/*
+	 * A lower frequency has been recommended!  Stop thermal
+	 * cycling (but keep the upper thermal limit) and switch to
+	 * the lower frequency.
+	 */
+	pwr->thermal_cycle = CYCLE_ENABLE;
+	del_timer_sync(&pwr->thermal_timer);
+	return level;
+}
+
+/*
+ * Use various metrics including level stability, NAP intervals, and
+ * overall GPU freq / DDR freq combination to decide if POPP should
+ * be activated.
+ */
+static bool popp_stable(struct kgsl_device *device)
+{
+	s64 t;
+	s64 nap_time = 0;
+	s64 go_time = 0;
+	int i, index;
+	int nap = 0;
+	s64 percent_nap = 0;
+	struct kgsl_pwr_event *e;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+
+	if (!test_bit(POPP_ON, &psc->popp_state))
+		return false;
+
+	/* If already pushed or running naturally at min don't push further */
+	if (test_bit(POPP_PUSH, &psc->popp_state))
+		return false;
+	if (!psc->popp_level &&
+			(pwr->active_pwrlevel == pwr->min_pwrlevel))
+		return false;
+	if (psc->history[KGSL_PWREVENT_STATE].events == NULL)
+		return false;
+
+	t = ktime_to_ms(ktime_get());
+	/* Check for recent NAP statistics: NAPping regularly and well? */
+	if (pwr->active_pwrlevel == 0) {
+		index = psc->history[KGSL_PWREVENT_STATE].index;
+		i = index > 0 ? (index - 1) :
+			(psc->history[KGSL_PWREVENT_STATE].size - 1);
+		while (i != index) {
+			e = &psc->history[KGSL_PWREVENT_STATE].events[i];
+			if (e->data == KGSL_STATE_NAP ||
+				e->data == KGSL_STATE_SLUMBER) {
+				if (ktime_to_ms(e->start) + STABLE_TIME > t) {
+					nap++;
+					nap_time += e->duration;
+				}
+			} else if (e->data == KGSL_STATE_ACTIVE) {
+				if (ktime_to_ms(e->start) + STABLE_TIME > t)
+					go_time += e->duration;
+			}
+			if (i == 0)
+				i = psc->history[KGSL_PWREVENT_STATE].size - 1;
+			else
+				i--;
+		}
+		if (nap_time && go_time) {
+			percent_nap = 100 * nap_time;
+			do_div(percent_nap, nap_time + go_time);
+		}
+		trace_kgsl_popp_nap(device, (int)nap_time / 1000, nap,
+				percent_nap);
+		/* If running high at turbo, don't push */
+		if (nap < MIN_SLEEP_PERIODS || percent_nap < MIN_SLEEP_PERCENT)
+			return false;
+	}
+
+	/* Finally check that there hasn't been a recent change */
+	if ((device->pwrscale.freq_change_time + STABLE_TIME) < t) {
+		device->pwrscale.freq_change_time = t;
+		return true;
+	}
+	return false;
+}
+
+bool kgsl_popp_check(struct kgsl_device *device)
+{
+	int i;
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+	struct kgsl_pwr_event *e;
+
+	if (!test_bit(POPP_ON, &psc->popp_state))
+		return false;
+	if (!test_bit(POPP_PUSH, &psc->popp_state))
+		return false;
+	if (psc->history[KGSL_PWREVENT_STATE].events == NULL) {
+		clear_bit(POPP_PUSH, &psc->popp_state);
+		return false;
+	}
+
+	e = &psc->history[KGSL_PWREVENT_STATE].
+			events[psc->history[KGSL_PWREVENT_STATE].index];
+	if (e->data == KGSL_STATE_SLUMBER)
+		e->duration = ktime_us_delta(ktime_get(), e->start);
+
+	/* If there's been a long SLUMBER in recent history, clear the _PUSH */
+	for (i = 0; i < psc->history[KGSL_PWREVENT_STATE].size; i++) {
+		e = &psc->history[KGSL_PWREVENT_STATE].events[i];
+		if ((e->data == KGSL_STATE_SLUMBER) &&
+			 (e->duration > POPP_RESET_TIME)) {
+			clear_bit(POPP_PUSH, &psc->popp_state);
+			return false;
+		}
+	}
+	return true;
+}
+
+/*
+ * The GPU has been running at the current frequency for a while.  Attempt
+ * to lower the frequency for boarderline cases.
+ */
+static void popp_trans1(struct kgsl_device *device)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwrlevel *pl = &pwr->pwrlevels[pwr->active_pwrlevel];
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+	int old_level = psc->popp_level;
+
+	switch (old_level) {
+	case 0:
+		psc->popp_level = 2;
+		/* If the current level has a high default bus don't push it */
+		if (pl->bus_freq == pl->bus_max)
+			pwr->bus_mod = 1;
+		kgsl_pwrctrl_pwrlevel_change(device, pwr->active_pwrlevel + 1);
+		break;
+	case 1:
+	case 2:
+		psc->popp_level++;
+		break;
+	case 3:
+		set_bit(POPP_PUSH, &psc->popp_state);
+		psc->popp_level = 0;
+		break;
+	case POPP_MAX:
+	default:
+		psc->popp_level = 0;
+		break;
+	}
+
+	trace_kgsl_popp_level(device, old_level, psc->popp_level);
+}
+
+/*
+ * The GPU DCVS algorithm recommends a level change.  Apply any
+ * POPP restrictions and update the level accordingly
+ */
+static int popp_trans2(struct kgsl_device *device, int level)
+{
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	struct kgsl_pwrscale *psc = &device->pwrscale;
+	int old_level = psc->popp_level;
+
+	if (!test_bit(POPP_ON, &psc->popp_state))
+		return level;
+
+	clear_bit(POPP_PUSH, &psc->popp_state);
+	/* If the governor recommends going down, do it! */
+	if (pwr->active_pwrlevel < level) {
+		psc->popp_level = 0;
+		trace_kgsl_popp_level(device, old_level, psc->popp_level);
+		return level;
+	}
+
+	switch (psc->popp_level) {
+	case 0:
+		/* If the feature isn't engaged, go up immediately */
+		break;
+	case 1:
+		/* Turn off mitigation, and go up a level */
+		psc->popp_level = 0;
+		break;
+	case 2:
+	case 3:
+		/* Try a more aggressive mitigation */
+		psc->popp_level--;
+		level++;
+		/* Update the stable timestamp */
+		device->pwrscale.freq_change_time = ktime_to_ms(ktime_get());
+		break;
+	case POPP_MAX:
+	default:
+		psc->popp_level = 0;
+		break;
+	}
+
+	trace_kgsl_popp_level(device, old_level, psc->popp_level);
+
+	return level;
+}
+
+#ifdef DEVFREQ_FLAG_WAKEUP_MAXFREQ
+static inline bool _check_maxfreq(u32 flags)
+{
+	return (flags & DEVFREQ_FLAG_WAKEUP_MAXFREQ);
+}
+#else
+static inline bool _check_maxfreq(u32 flags)
+{
+	return false;
+}
+#endif
+
+/*
+ * kgsl_devfreq_target - devfreq_dev_profile.target callback
+ * @dev: see devfreq.h
+ * @freq: see devfreq.h
+ * @flags: see devfreq.h
+ *
+ * This function expects the device mutex to be unlocked.
+ */
+int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+	struct kgsl_pwrctrl *pwr;
+	struct kgsl_pwrlevel *pwr_level;
+	int level, i;
+	unsigned long cur_freq;
+
+	if (device == NULL)
+		return -ENODEV;
+	if (freq == NULL)
+		return -EINVAL;
+	if (!device->pwrscale.enabled)
+		return 0;
+
+	pwr = &device->pwrctrl;
+	if (_check_maxfreq(flags)) {
+		/*
+		 * The GPU is about to get suspended,
+		 * but it needs to be at the max power level when waking up
+		 */
+		pwr->wakeup_maxpwrlevel = 1;
+		return 0;
+	}
+
+	mutex_lock(&device->mutex);
+	cur_freq = kgsl_pwrctrl_active_freq(pwr);
+	level = pwr->active_pwrlevel;
+	pwr_level = &pwr->pwrlevels[level];
+
+	/* If the governor recommends a new frequency, update it here */
+	if (*freq != cur_freq) {
+		level = pwr->max_pwrlevel;
+		for (i = pwr->min_pwrlevel; i >= pwr->max_pwrlevel; i--)
+			if (*freq <= pwr->pwrlevels[i].gpu_freq) {
+				if (pwr->thermal_cycle == CYCLE_ACTIVE)
+					level = _thermal_adjust(pwr, i);
+				else
+					level = popp_trans2(device, i);
+				break;
+			}
+		if (level != pwr->active_pwrlevel)
+			kgsl_pwrctrl_pwrlevel_change(device, level);
+	} else if (popp_stable(device)) {
+		popp_trans1(device);
+	}
+
+	*freq = kgsl_pwrctrl_active_freq(pwr);
+
+	mutex_unlock(&device->mutex);
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_devfreq_target);
+
+/*
+ * kgsl_devfreq_get_dev_status - devfreq_dev_profile.get_dev_status callback
+ * @dev: see devfreq.h
+ * @freq: see devfreq.h
+ * @flags: see devfreq.h
+ *
+ * This function expects the device mutex to be unlocked.
+ */
+int kgsl_devfreq_get_dev_status(struct device *dev,
+				struct devfreq_dev_status *stat)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+	struct kgsl_pwrctrl *pwrctrl;
+	struct kgsl_pwrscale *pwrscale;
+	ktime_t tmp;
+
+	if (device == NULL)
+		return -ENODEV;
+	if (stat == NULL)
+		return -EINVAL;
+
+	pwrscale = &device->pwrscale;
+	pwrctrl = &device->pwrctrl;
+
+	mutex_lock(&device->mutex);
+	/*
+	 * If the GPU clock is on grab the latest power counter
+	 * values.  Otherwise the most recent ACTIVE values will
+	 * already be stored in accum_stats.
+	 */
+	kgsl_pwrscale_update_stats(device);
+
+	tmp = ktime_get();
+	stat->total_time = ktime_us_delta(tmp, pwrscale->time);
+	pwrscale->time = tmp;
+
+	stat->busy_time = pwrscale->accum_stats.busy_time;
+
+	stat->current_frequency = kgsl_pwrctrl_active_freq(&device->pwrctrl);
+
+	stat->private_data = &device->active_context_count;
+
+	/*
+	 * keep the latest devfreq_dev_status values
+	 * and vbif counters data
+	 * to be (re)used by kgsl_busmon_get_dev_status()
+	 */
+	if (pwrctrl->bus_control) {
+		struct xstats *last_b =
+			(struct xstats *)last_status.private_data;
+
+		last_status.total_time = stat->total_time;
+		last_status.busy_time = stat->busy_time;
+		last_status.current_frequency = stat->current_frequency;
+
+		last_b->ram_time = device->pwrscale.accum_stats.ram_time;
+		last_b->ram_wait = device->pwrscale.accum_stats.ram_wait;
+		last_b->mod = device->pwrctrl.bus_mod;
+	}
+
+	kgsl_pwrctrl_busy_time(device, stat->total_time, stat->busy_time);
+	trace_kgsl_pwrstats(device, stat->total_time,
+		&pwrscale->accum_stats, device->active_context_count);
+	memset(&pwrscale->accum_stats, 0, sizeof(pwrscale->accum_stats));
+
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_devfreq_get_dev_status);
+
+/*
+ * kgsl_devfreq_get_cur_freq - devfreq_dev_profile.get_cur_freq callback
+ * @dev: see devfreq.h
+ * @freq: see devfreq.h
+ * @flags: see devfreq.h
+ *
+ * This function expects the device mutex to be unlocked.
+ */
+int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+
+	if (device == NULL)
+		return -ENODEV;
+	if (freq == NULL)
+		return -EINVAL;
+
+	mutex_lock(&device->mutex);
+	*freq = kgsl_pwrctrl_active_freq(&device->pwrctrl);
+	mutex_unlock(&device->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_devfreq_get_cur_freq);
+
+/*
+ * kgsl_devfreq_add_notifier - add a fine grained notifier.
+ * @dev: The device
+ * @nb: Notifier block that will receive updates.
+ *
+ * Add a notifier to receive ADRENO_DEVFREQ_NOTIFY_* events
+ * from the device.
+ */
+int kgsl_devfreq_add_notifier(struct device *dev,
+		struct notifier_block *nb)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+
+	if (device == NULL)
+		return -ENODEV;
+
+	if (nb == NULL)
+		return -EINVAL;
+
+	return srcu_notifier_chain_register(&device->pwrscale.nh, nb);
+}
+EXPORT_SYMBOL(kgsl_devfreq_add_notifier);
+
+/*
+ * kgsl_devfreq_del_notifier - remove a fine grained notifier.
+ * @dev: The device
+ * @nb: The notifier block.
+ *
+ * Remove a notifier registered with kgsl_devfreq_add_notifier().
+ */
+int kgsl_devfreq_del_notifier(struct device *dev, struct notifier_block *nb)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+
+	if (device == NULL)
+		return -ENODEV;
+
+	if (nb == NULL)
+		return -EINVAL;
+
+	return srcu_notifier_chain_unregister(&device->pwrscale.nh, nb);
+}
+EXPORT_SYMBOL(kgsl_devfreq_del_notifier);
+
+
+/*
+ * kgsl_busmon_get_dev_status - devfreq_dev_profile.get_dev_status callback
+ * @dev: see devfreq.h
+ * @freq: see devfreq.h
+ * @flags: see devfreq.h
+ *
+ * This function expects the device mutex to be unlocked.
+ */
+int kgsl_busmon_get_dev_status(struct device *dev,
+			struct devfreq_dev_status *stat)
+{
+	struct xstats *b;
+
+	stat->total_time = last_status.total_time;
+	stat->busy_time = last_status.busy_time;
+	stat->current_frequency = last_status.current_frequency;
+	if (stat->private_data) {
+		struct xstats *last_b =
+			(struct xstats *)last_status.private_data;
+		b = (struct xstats *)stat->private_data;
+		b->ram_time = last_b->ram_time;
+		b->ram_wait = last_b->ram_wait;
+		b->mod = last_b->mod;
+	}
+	return 0;
+}
+
+#ifdef DEVFREQ_FLAG_FAST_HINT
+static inline bool _check_fast_hint(u32 flags)
+{
+	return (flags & DEVFREQ_FLAG_FAST_HINT);
+}
+#else
+static inline bool _check_fast_hint(u32 flags)
+{
+	return false;
+}
+#endif
+
+#ifdef DEVFREQ_FLAG_SLOW_HINT
+static inline bool _check_slow_hint(u32 flags)
+{
+	return (flags & DEVFREQ_FLAG_SLOW_HINT);
+}
+#else
+static inline bool _check_slow_hint(u32 flags)
+{
+	return false;
+}
+#endif
+
+/*
+ * kgsl_busmon_target - devfreq_dev_profile.target callback
+ * @dev: see devfreq.h
+ * @freq: see devfreq.h
+ * @flags: see devfreq.h
+ *
+ * This function expects the device mutex to be unlocked.
+ */
+int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags)
+{
+	struct kgsl_device *device = dev_get_drvdata(dev);
+	struct kgsl_pwrctrl *pwr;
+	struct kgsl_pwrlevel *pwr_level;
+	int  level, b;
+	u32 bus_flag;
+	unsigned long ab_mbytes;
+
+	if (device == NULL)
+		return -ENODEV;
+	if (freq == NULL)
+		return -EINVAL;
+	if (!device->pwrscale.enabled)
+		return 0;
+
+	pwr = &device->pwrctrl;
+
+	if (!pwr->bus_control)
+		return 0;
+
+	mutex_lock(&device->mutex);
+	level = pwr->active_pwrlevel;
+	pwr_level = &pwr->pwrlevels[level];
+	bus_flag = device->pwrscale.bus_profile.flag;
+	device->pwrscale.bus_profile.flag = 0;
+	ab_mbytes = device->pwrscale.bus_profile.ab_mbytes;
+
+	/*
+	 * Bus devfreq governor has calculated its recomendations
+	 * when gpu was running with *freq frequency.
+	 * If the gpu frequency is different now it's better to
+	 * ignore the call
+	 */
+	if (pwr_level->gpu_freq != *freq) {
+		mutex_unlock(&device->mutex);
+		return 0;
+	}
+
+	b = pwr->bus_mod;
+	if (_check_fast_hint(bus_flag) &&
+		((pwr_level->bus_freq + pwr->bus_mod) < pwr_level->bus_max))
+		pwr->bus_mod++;
+	else if (_check_slow_hint(bus_flag) &&
+		((pwr_level->bus_freq + pwr->bus_mod) > pwr_level->bus_min))
+		pwr->bus_mod--;
+
+	/* Update bus vote if AB or IB is modified */
+	if ((pwr->bus_mod != b) || (pwr->bus_ab_mbytes != ab_mbytes)) {
+		pwr->bus_percent_ab = device->pwrscale.bus_profile.percent_ab;
+		pwr->bus_ab_mbytes = ab_mbytes;
+		kgsl_pwrctrl_buslevel_update(device, true);
+	}
+
+	mutex_unlock(&device->mutex);
+	return 0;
+}
+
+int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq)
+{
+	return 0;
+}
+
+
+/*
+ * kgsl_pwrscale_init - Initialize pwrscale.
+ * @dev: The device
+ * @governor: The initial governor to use.
+ *
+ * Initialize devfreq and any non-constant profile data.
+ */
+int kgsl_pwrscale_init(struct device *dev, const char *governor)
+{
+	struct kgsl_device *device;
+	struct kgsl_pwrscale *pwrscale;
+	struct kgsl_pwrctrl *pwr;
+	struct devfreq *devfreq;
+	struct devfreq *bus_devfreq;
+	struct msm_adreno_extended_profile *gpu_profile;
+	struct devfreq_dev_profile *profile;
+	struct devfreq_msm_adreno_tz_data *data;
+	int i, out = 0;
+	int ret;
+
+	device = dev_get_drvdata(dev);
+	if (device == NULL)
+		return -ENODEV;
+
+	pwrscale = &device->pwrscale;
+	pwr = &device->pwrctrl;
+	gpu_profile = &pwrscale->gpu_profile;
+	profile = &pwrscale->gpu_profile.profile;
+
+	srcu_init_notifier_head(&pwrscale->nh);
+
+	profile->initial_freq =
+		pwr->pwrlevels[pwr->default_pwrlevel].gpu_freq;
+	/* Let's start with 10 ms and tune in later */
+	profile->polling_ms = 10;
+
+	/* do not include the 'off' level or duplicate freq. levels */
+	for (i = 0; i < (pwr->num_pwrlevels - 1); i++)
+		pwrscale->freq_table[out++] = pwr->pwrlevels[i].gpu_freq;
+
+	/*
+	 * Max_state is the number of valid power levels.
+	 * The valid power levels range from 0 - (max_state - 1)
+	 */
+	profile->max_state = pwr->num_pwrlevels - 1;
+	/* link storage array to the devfreq profile pointer */
+	profile->freq_table = pwrscale->freq_table;
+
+	/* if there is only 1 freq, no point in running a governor */
+	if (profile->max_state == 1)
+		governor = "performance";
+
+	/* initialize msm-adreno-tz governor specific data here */
+	data = gpu_profile->private_data;
+
+	data->disable_busy_time_burst = of_property_read_bool(
+		device->pdev->dev.of_node, "qcom,disable-busy-time-burst");
+
+	data->ctxt_aware_enable =
+		of_property_read_bool(device->pdev->dev.of_node,
+			"qcom,enable-ca-jump");
+
+	if (data->ctxt_aware_enable) {
+		if (of_property_read_u32(device->pdev->dev.of_node,
+				"qcom,ca-target-pwrlevel",
+				&data->bin.ctxt_aware_target_pwrlevel))
+			data->bin.ctxt_aware_target_pwrlevel = 1;
+
+		if ((data->bin.ctxt_aware_target_pwrlevel < 0) ||
+			(data->bin.ctxt_aware_target_pwrlevel >
+						pwr->num_pwrlevels))
+			data->bin.ctxt_aware_target_pwrlevel = 1;
+
+		if (of_property_read_u32(device->pdev->dev.of_node,
+				"qcom,ca-busy-penalty",
+				&data->bin.ctxt_aware_busy_penalty))
+			data->bin.ctxt_aware_busy_penalty = 12000;
+	}
+
+	/*
+	 * If there is a separate GX power rail, allow
+	 * independent modification to its voltage through
+	 * the bus bandwidth vote.
+	 */
+	if (pwr->bus_control) {
+		out = 0;
+		while (pwr->bus_ib[out] && out <= pwr->pwrlevels[0].bus_max) {
+			pwr->bus_ib[out] =
+				pwr->bus_ib[out] >> 20;
+			out++;
+		}
+		data->bus.num = out;
+		data->bus.ib = &pwr->bus_ib[0];
+		data->bus.index = &pwr->bus_index[0];
+		data->bus.width = pwr->bus_width;
+	} else
+		data->bus.num = 0;
+
+	devfreq = devfreq_add_device(dev, &pwrscale->gpu_profile.profile,
+			governor, pwrscale->gpu_profile.private_data);
+	if (IS_ERR(devfreq)) {
+		device->pwrscale.enabled = false;
+		return PTR_ERR(devfreq);
+	}
+
+	pwrscale->devfreqptr = devfreq;
+
+	pwrscale->gpu_profile.bus_devfreq = NULL;
+	if (data->bus.num) {
+		pwrscale->bus_profile.profile.max_state
+					= pwr->num_pwrlevels - 1;
+		pwrscale->bus_profile.profile.freq_table
+					= pwrscale->freq_table;
+
+		bus_devfreq = devfreq_add_device(device->busmondev,
+			&pwrscale->bus_profile.profile, "gpubw_mon", NULL);
+		if (!IS_ERR(bus_devfreq))
+			pwrscale->gpu_profile.bus_devfreq = bus_devfreq;
+	}
+
+	ret = sysfs_create_link(&device->dev->kobj,
+			&devfreq->dev.kobj, "devfreq");
+
+	pwrscale->devfreq_wq = create_freezable_workqueue("kgsl_devfreq_wq");
+	INIT_WORK(&pwrscale->devfreq_suspend_ws, do_devfreq_suspend);
+	INIT_WORK(&pwrscale->devfreq_resume_ws, do_devfreq_resume);
+	INIT_WORK(&pwrscale->devfreq_notify_ws, do_devfreq_notify);
+
+	pwrscale->next_governor_call = ktime_add_us(ktime_get(),
+			KGSL_GOVERNOR_CALL_INTERVAL);
+
+	/* history tracking */
+	for (i = 0; i < KGSL_PWREVENT_MAX; i++) {
+		pwrscale->history[i].events = kzalloc(
+				pwrscale->history[i].size *
+				sizeof(struct kgsl_pwr_event), GFP_KERNEL);
+		pwrscale->history[i].type = i;
+	}
+
+	/* Add links to the devfreq sysfs nodes */
+	kgsl_gpu_sysfs_add_link(device->gpu_sysfs_kobj,
+			 &pwrscale->devfreqptr->dev.kobj, "governor",
+			"gpu_governor");
+	kgsl_gpu_sysfs_add_link(device->gpu_sysfs_kobj,
+			 &pwrscale->devfreqptr->dev.kobj,
+			"available_governors", "gpu_available_governor");
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_pwrscale_init);
+
+/*
+ * kgsl_pwrscale_close - clean up pwrscale
+ * @device: the device
+ *
+ * This function should be called with the device mutex locked.
+ */
+void kgsl_pwrscale_close(struct kgsl_device *device)
+{
+	int i;
+	struct kgsl_pwrscale *pwrscale;
+
+	pwrscale = &device->pwrscale;
+	if (!pwrscale->devfreqptr)
+		return;
+	flush_workqueue(pwrscale->devfreq_wq);
+	destroy_workqueue(pwrscale->devfreq_wq);
+	devfreq_remove_device(device->pwrscale.devfreqptr);
+	device->pwrscale.devfreqptr = NULL;
+	srcu_cleanup_notifier_head(&device->pwrscale.nh);
+	for (i = 0; i < KGSL_PWREVENT_MAX; i++)
+		kfree(pwrscale->history[i].events);
+}
+EXPORT_SYMBOL(kgsl_pwrscale_close);
+
+static void do_devfreq_suspend(struct work_struct *work)
+{
+	struct kgsl_pwrscale *pwrscale = container_of(work,
+			struct kgsl_pwrscale, devfreq_suspend_ws);
+	struct devfreq *devfreq = pwrscale->devfreqptr;
+
+	devfreq_suspend_device(devfreq);
+}
+
+static void do_devfreq_resume(struct work_struct *work)
+{
+	struct kgsl_pwrscale *pwrscale = container_of(work,
+			struct kgsl_pwrscale, devfreq_resume_ws);
+	struct devfreq *devfreq = pwrscale->devfreqptr;
+
+	devfreq_resume_device(devfreq);
+}
+
+static void do_devfreq_notify(struct work_struct *work)
+{
+	struct kgsl_pwrscale *pwrscale = container_of(work,
+			struct kgsl_pwrscale, devfreq_notify_ws);
+	struct devfreq *devfreq = pwrscale->devfreqptr;
+
+	srcu_notifier_call_chain(&pwrscale->nh,
+				 ADRENO_DEVFREQ_NOTIFY_RETIRE,
+				 devfreq);
+}
diff --git a/drivers/gpu/msm/kgsl_pwrscale.h b/drivers/gpu/msm/kgsl_pwrscale.h
new file mode 100644
index 0000000..9a6ccac
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_pwrscale.h
@@ -0,0 +1,162 @@
+/* Copyright (c) 2010-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __KGSL_PWRSCALE_H
+#define __KGSL_PWRSCALE_H
+
+#include <linux/devfreq.h>
+#include <linux/msm_adreno_devfreq.h>
+#include "kgsl_pwrctrl.h"
+
+/* devfreq governor call window in usec */
+#define KGSL_GOVERNOR_CALL_INTERVAL 10000
+
+/* Power events to be tracked with history */
+#define KGSL_PWREVENT_STATE	0
+#define KGSL_PWREVENT_GPU_FREQ	1
+#define KGSL_PWREVENT_BUS_FREQ	2
+#define KGSL_PWREVENT_POPP	3
+#define KGSL_PWREVENT_MAX	4
+
+/**
+ * Amount of time running at a level to be considered
+ * "stable" in msec
+ */
+#define STABLE_TIME	150
+
+/* Amount of idle time needed to re-set stability in usec */
+#define POPP_RESET_TIME	1000000
+
+/* Number of POPP levels */
+#define POPP_MAX	4
+
+/* POPP state bits */
+#define POPP_ON		BIT(0)
+#define POPP_PUSH	BIT(1)
+
+struct kgsl_popp {
+	int gpu_x;
+	int ddr_y;
+};
+
+struct kgsl_power_stats {
+	u64 busy_time;
+	u64 ram_time;
+	u64 ram_wait;
+};
+
+struct kgsl_pwr_event {
+	unsigned int data;
+	ktime_t start;
+	s64 duration;
+};
+
+struct kgsl_pwr_history {
+	struct kgsl_pwr_event *events;
+	unsigned int type;
+	unsigned int index;
+	unsigned int size;
+};
+
+/**
+ * struct kgsl_pwrscale - Power scaling settings for a KGSL device
+ * @devfreqptr - Pointer to the devfreq device
+ * @gpu_profile - GPU profile data for the devfreq device
+ * @bus_profile - Bus specific data for the bus devfreq device
+ * @freq_table - GPU frequencies for the DCVS algorithm
+ * @last_governor - Prior devfreq governor
+ * @accum_stats - Accumulated statistics for various frequency calculations
+ * @enabled - Whether or not power scaling is enabled
+ * @time - Last submitted sample timestamp
+ * @on_time - Timestamp when gpu busy begins
+ * @freq_change_time - Timestamp of last freq change or popp update
+ * @nh - Notifier for the partner devfreq bus device
+ * @devfreq_wq - Main devfreq workqueue
+ * @devfreq_suspend_ws - Pass device suspension to devfreq
+ * @devfreq_resume_ws - Pass device resume to devfreq
+ * @devfreq_notify_ws - Notify devfreq to update sampling
+ * @next_governor_call - Timestamp after which the governor may be notified of
+ * a new sample
+ * @history - History of power events with timestamps and durations
+ * @popp_level - Current level of POPP mitigation
+ * @popp_state - Control state for POPP, on/off, recently pushed, etc
+ */
+struct kgsl_pwrscale {
+	struct devfreq *devfreqptr;
+	struct msm_adreno_extended_profile gpu_profile;
+	struct msm_busmon_extended_profile bus_profile;
+	unsigned long freq_table[KGSL_MAX_PWRLEVELS];
+	char last_governor[DEVFREQ_NAME_LEN];
+	struct kgsl_power_stats accum_stats;
+	bool enabled;
+	ktime_t time;
+	s64 on_time;
+	s64 freq_change_time;
+	struct srcu_notifier_head nh;
+	struct workqueue_struct *devfreq_wq;
+	struct work_struct devfreq_suspend_ws;
+	struct work_struct devfreq_resume_ws;
+	struct work_struct devfreq_notify_ws;
+	ktime_t next_governor_call;
+	struct kgsl_pwr_history history[KGSL_PWREVENT_MAX];
+	int popp_level;
+	unsigned long popp_state;
+};
+
+int kgsl_pwrscale_init(struct device *dev, const char *governor);
+void kgsl_pwrscale_close(struct kgsl_device *device);
+
+void kgsl_pwrscale_update(struct kgsl_device *device);
+void kgsl_pwrscale_update_stats(struct kgsl_device *device);
+void kgsl_pwrscale_busy(struct kgsl_device *device);
+void kgsl_pwrscale_sleep(struct kgsl_device *device);
+void kgsl_pwrscale_wake(struct kgsl_device *device);
+
+void kgsl_pwrscale_enable(struct kgsl_device *device);
+void kgsl_pwrscale_disable(struct kgsl_device *device, bool turbo);
+
+int kgsl_devfreq_target(struct device *dev, unsigned long *freq, u32 flags);
+int kgsl_devfreq_get_dev_status(struct device *dev,
+			struct devfreq_dev_status *stat);
+int kgsl_devfreq_get_cur_freq(struct device *dev, unsigned long *freq);
+
+int kgsl_busmon_target(struct device *dev, unsigned long *freq, u32 flags);
+int kgsl_busmon_get_dev_status(struct device *dev,
+			struct devfreq_dev_status *stat);
+int kgsl_busmon_get_cur_freq(struct device *dev, unsigned long *freq);
+
+bool kgsl_popp_check(struct kgsl_device *device);
+
+
+#define KGSL_PWRSCALE_INIT(_priv_data) { \
+	.enabled = true, \
+	.gpu_profile = { \
+		.private_data = _priv_data, \
+		.profile = { \
+			.target = kgsl_devfreq_target, \
+			.get_dev_status = kgsl_devfreq_get_dev_status, \
+			.get_cur_freq = kgsl_devfreq_get_cur_freq, \
+	} }, \
+	.bus_profile = { \
+		.private_data = _priv_data, \
+		.profile = { \
+			.target = kgsl_busmon_target, \
+			.get_dev_status = kgsl_busmon_get_dev_status, \
+			.get_cur_freq = kgsl_busmon_get_cur_freq, \
+	} }, \
+	.history[KGSL_PWREVENT_STATE].size = 20, \
+	.history[KGSL_PWREVENT_GPU_FREQ].size = 3, \
+	.history[KGSL_PWREVENT_BUS_FREQ].size = 5, \
+	.history[KGSL_PWREVENT_POPP].size = 5, \
+	}
+#endif
diff --git a/drivers/gpu/msm/kgsl_sharedmem.c b/drivers/gpu/msm/kgsl_sharedmem.c
new file mode 100644
index 0000000..01aac1e
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_sharedmem.c
@@ -0,0 +1,1175 @@
+/* Copyright (c) 2002,2007-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/export.h>
+#include <linux/vmalloc.h>
+#include <asm/cacheflush.h>
+#include <linux/slab.h>
+#include <linux/kmemleak.h>
+#include <linux/highmem.h>
+#include <linux/scatterlist.h>
+#include <soc/qcom/scm.h>
+#include <soc/qcom/secure_buffer.h>
+
+#include "kgsl.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_device.h"
+#include "kgsl_log.h"
+#include "kgsl_mmu.h"
+#include "kgsl_pool.h"
+
+/*
+ * The user can set this from debugfs to force failed memory allocations to
+ * fail without trying OOM first.  This is a debug setting useful for
+ * stress applications that want to test failure cases without pushing the
+ * system into unrecoverable OOM panics
+ */
+
+static bool sharedmem_noretry_flag;
+
+static DEFINE_MUTEX(kernel_map_global_lock);
+
+struct cp2_mem_chunks {
+	unsigned int chunk_list;
+	unsigned int chunk_list_size;
+	unsigned int chunk_size;
+} __attribute__ ((__packed__));
+
+struct cp2_lock_req {
+	struct cp2_mem_chunks chunks;
+	unsigned int mem_usage;
+	unsigned int lock;
+} __attribute__ ((__packed__));
+
+#define MEM_PROTECT_LOCK_ID2		0x0A
+#define MEM_PROTECT_LOCK_ID2_FLAT	0x11
+
+/* An attribute for showing per-process memory statistics */
+struct kgsl_mem_entry_attribute {
+	struct attribute attr;
+	int memtype;
+	ssize_t (*show)(struct kgsl_process_private *priv,
+		int type, char *buf);
+};
+
+#define to_mem_entry_attr(a) \
+container_of(a, struct kgsl_mem_entry_attribute, attr)
+
+#define __MEM_ENTRY_ATTR(_type, _name, _show) \
+{ \
+	.attr = { .name = __stringify(_name), .mode = 0444 }, \
+	.memtype = _type, \
+	.show = _show, \
+}
+
+/*
+ * A structure to hold the attributes for a particular memory type.
+ * For each memory type in each process we store the current and maximum
+ * memory usage and display the counts in sysfs.  This structure and
+ * the following macro allow us to simplify the definition for those
+ * adding new memory types
+ */
+
+struct mem_entry_stats {
+	int memtype;
+	struct kgsl_mem_entry_attribute attr;
+	struct kgsl_mem_entry_attribute max_attr;
+};
+
+
+#define MEM_ENTRY_STAT(_type, _name) \
+{ \
+	.memtype = _type, \
+	.attr = __MEM_ENTRY_ATTR(_type, _name, mem_entry_show), \
+	.max_attr = __MEM_ENTRY_ATTR(_type, _name##_max, \
+		mem_entry_max_show), \
+}
+
+static void kgsl_cma_unlock_secure(struct kgsl_memdesc *memdesc);
+
+/**
+ * Show the current amount of memory allocated for the given memtype
+ */
+
+static ssize_t
+mem_entry_show(struct kgsl_process_private *priv, int type, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%llu\n", priv->stats[type].cur);
+}
+
+/**
+ * Show the maximum memory allocated for the given memtype through the life of
+ * the process
+ */
+
+static ssize_t
+mem_entry_max_show(struct kgsl_process_private *priv, int type, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%llu\n", priv->stats[type].max);
+}
+
+static ssize_t mem_entry_sysfs_show(struct kobject *kobj,
+	struct attribute *attr, char *buf)
+{
+	struct kgsl_mem_entry_attribute *pattr = to_mem_entry_attr(attr);
+	struct kgsl_process_private *priv;
+	ssize_t ret;
+
+	/*
+	 * 1. sysfs_remove_file waits for reads to complete before the node
+	 *    is deleted.
+	 * 2. kgsl_process_init_sysfs takes a refcount to the process_private,
+	 *    which is put at the end of kgsl_process_uninit_sysfs.
+	 * These two conditions imply that priv will not be freed until this
+	 * function completes, and no further locking is needed.
+	 */
+	priv = kobj ? container_of(kobj, struct kgsl_process_private, kobj) :
+			NULL;
+
+	if (priv && pattr->show)
+		ret = pattr->show(priv, pattr->memtype, buf);
+	else
+		ret = -EIO;
+
+	return ret;
+}
+
+static const struct sysfs_ops mem_entry_sysfs_ops = {
+	.show = mem_entry_sysfs_show,
+};
+
+static struct kobj_type ktype_mem_entry = {
+	.sysfs_ops = &mem_entry_sysfs_ops,
+};
+
+static struct mem_entry_stats mem_stats[] = {
+	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_KERNEL, kernel),
+	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_USER, user),
+#ifdef CONFIG_ION
+	MEM_ENTRY_STAT(KGSL_MEM_ENTRY_ION, ion),
+#endif
+};
+
+void
+kgsl_process_uninit_sysfs(struct kgsl_process_private *private)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(mem_stats); i++) {
+		sysfs_remove_file(&private->kobj, &mem_stats[i].attr.attr);
+		sysfs_remove_file(&private->kobj,
+			&mem_stats[i].max_attr.attr);
+	}
+
+	kobject_put(&private->kobj);
+	/* Put the refcount we got in kgsl_process_init_sysfs */
+	kgsl_process_private_put(private);
+}
+
+/**
+ * kgsl_process_init_sysfs() - Initialize and create sysfs files for a process
+ *
+ * @device: Pointer to kgsl device struct
+ * @private: Pointer to the structure for the process
+ *
+ * kgsl_process_init_sysfs() is called at the time of creating the
+ * process struct when a process opens the kgsl device for the first time.
+ * This function creates the sysfs files for the process.
+ */
+void kgsl_process_init_sysfs(struct kgsl_device *device,
+		struct kgsl_process_private *private)
+{
+	unsigned char name[16];
+	int i;
+
+	/* Keep private valid until the sysfs enries are removed. */
+	kgsl_process_private_get(private);
+
+	snprintf(name, sizeof(name), "%d", private->pid);
+
+	if (kobject_init_and_add(&private->kobj, &ktype_mem_entry,
+		kgsl_driver.prockobj, name)) {
+		WARN(1, "Unable to add sysfs dir '%s'\n", name);
+		return;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(mem_stats); i++) {
+		if (sysfs_create_file(&private->kobj,
+			&mem_stats[i].attr.attr))
+			WARN(1, "Couldn't create sysfs file '%s'\n",
+				mem_stats[i].attr.attr.name);
+
+		if (sysfs_create_file(&private->kobj,
+			&mem_stats[i].max_attr.attr))
+			WARN(1, "Couldn't create sysfs file '%s'\n",
+				mem_stats[i].max_attr.attr.name);
+
+	}
+}
+
+static ssize_t kgsl_drv_memstat_show(struct device *dev,
+				 struct device_attribute *attr,
+				 char *buf)
+{
+	uint64_t val = 0;
+
+	if (!strcmp(attr->attr.name, "vmalloc"))
+		val = atomic_long_read(&kgsl_driver.stats.vmalloc);
+	else if (!strcmp(attr->attr.name, "vmalloc_max"))
+		val = atomic_long_read(&kgsl_driver.stats.vmalloc_max);
+	else if (!strcmp(attr->attr.name, "page_alloc"))
+		val = atomic_long_read(&kgsl_driver.stats.page_alloc);
+	else if (!strcmp(attr->attr.name, "page_alloc_max"))
+		val = atomic_long_read(&kgsl_driver.stats.page_alloc_max);
+	else if (!strcmp(attr->attr.name, "coherent"))
+		val = atomic_long_read(&kgsl_driver.stats.coherent);
+	else if (!strcmp(attr->attr.name, "coherent_max"))
+		val = atomic_long_read(&kgsl_driver.stats.coherent_max);
+	else if (!strcmp(attr->attr.name, "secure"))
+		val = atomic_long_read(&kgsl_driver.stats.secure);
+	else if (!strcmp(attr->attr.name, "secure_max"))
+		val = atomic_long_read(&kgsl_driver.stats.secure_max);
+	else if (!strcmp(attr->attr.name, "mapped"))
+		val = atomic_long_read(&kgsl_driver.stats.mapped);
+	else if (!strcmp(attr->attr.name, "mapped_max"))
+		val = atomic_long_read(&kgsl_driver.stats.mapped_max);
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n", val);
+}
+
+static ssize_t kgsl_drv_full_cache_threshold_store(struct device *dev,
+					 struct device_attribute *attr,
+					 const char *buf, size_t count)
+{
+	int ret;
+	unsigned int thresh = 0;
+
+	ret = kgsl_sysfs_store(buf, &thresh);
+	if (ret)
+		return ret;
+
+	kgsl_driver.full_cache_threshold = thresh;
+	return count;
+}
+
+static ssize_t kgsl_drv_full_cache_threshold_show(struct device *dev,
+					struct device_attribute *attr,
+					char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			kgsl_driver.full_cache_threshold);
+}
+
+static DEVICE_ATTR(vmalloc, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(vmalloc_max, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(page_alloc, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(page_alloc_max, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(coherent, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(coherent_max, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(secure, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(secure_max, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(mapped, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(mapped_max, 0444, kgsl_drv_memstat_show, NULL);
+static DEVICE_ATTR(full_cache_threshold, 0644,
+		kgsl_drv_full_cache_threshold_show,
+		kgsl_drv_full_cache_threshold_store);
+
+static const struct device_attribute *drv_attr_list[] = {
+	&dev_attr_vmalloc,
+	&dev_attr_vmalloc_max,
+	&dev_attr_page_alloc,
+	&dev_attr_page_alloc_max,
+	&dev_attr_coherent,
+	&dev_attr_coherent_max,
+	&dev_attr_secure,
+	&dev_attr_secure_max,
+	&dev_attr_mapped,
+	&dev_attr_mapped_max,
+	&dev_attr_full_cache_threshold,
+	NULL
+};
+
+void
+kgsl_sharedmem_uninit_sysfs(void)
+{
+	kgsl_remove_device_sysfs_files(&kgsl_driver.virtdev, drv_attr_list);
+}
+
+int
+kgsl_sharedmem_init_sysfs(void)
+{
+	return kgsl_create_device_sysfs_files(&kgsl_driver.virtdev,
+		drv_attr_list);
+}
+
+static int kgsl_cma_alloc_secure(struct kgsl_device *device,
+			struct kgsl_memdesc *memdesc, uint64_t size);
+
+static int kgsl_allocate_secure(struct kgsl_device *device,
+				struct kgsl_memdesc *memdesc,
+				uint64_t size) {
+	int ret;
+
+	if (MMU_FEATURE(&device->mmu, KGSL_MMU_HYP_SECURE_ALLOC))
+		ret = kgsl_sharedmem_page_alloc_user(memdesc, size);
+	else
+		ret = kgsl_cma_alloc_secure(device, memdesc, size);
+
+	return ret;
+}
+
+int kgsl_allocate_user(struct kgsl_device *device,
+		struct kgsl_memdesc *memdesc,
+		uint64_t size, uint64_t flags)
+{
+	int ret;
+
+	memdesc->flags = flags;
+
+	if (kgsl_mmu_get_mmutype(device) == KGSL_MMU_TYPE_NONE)
+		ret = kgsl_sharedmem_alloc_contig(device, memdesc, size);
+	else if (flags & KGSL_MEMFLAGS_SECURE)
+		ret = kgsl_allocate_secure(device, memdesc, size);
+	else
+		ret = kgsl_sharedmem_page_alloc_user(memdesc, size);
+
+	return ret;
+}
+
+static int kgsl_page_alloc_vmfault(struct kgsl_memdesc *memdesc,
+				struct vm_area_struct *vma,
+				struct vm_fault *vmf)
+{
+	int pgoff;
+	unsigned int offset;
+
+	offset = ((unsigned long) vmf->virtual_address - vma->vm_start);
+
+	if (offset >= memdesc->size)
+		return VM_FAULT_SIGBUS;
+
+	pgoff = offset >> PAGE_SHIFT;
+
+	if (pgoff < memdesc->page_count) {
+		struct page *page = memdesc->pages[pgoff];
+
+		get_page(page);
+		vmf->page = page;
+
+		memdesc->mapsize += PAGE_SIZE;
+
+		return 0;
+	}
+
+	return VM_FAULT_SIGBUS;
+}
+
+/*
+ * kgsl_page_alloc_unmap_kernel() - Unmap the memory in memdesc
+ *
+ * @memdesc: The memory descriptor which contains information about the memory
+ *
+ * Unmaps the memory mapped into kernel address space
+ */
+static void kgsl_page_alloc_unmap_kernel(struct kgsl_memdesc *memdesc)
+{
+	mutex_lock(&kernel_map_global_lock);
+	if (!memdesc->hostptr) {
+		/* If already unmapped the refcount should be 0 */
+		WARN_ON(memdesc->hostptr_count);
+		goto done;
+	}
+	memdesc->hostptr_count--;
+	if (memdesc->hostptr_count)
+		goto done;
+	vunmap(memdesc->hostptr);
+
+	atomic_long_sub(memdesc->size, &kgsl_driver.stats.vmalloc);
+	memdesc->hostptr = NULL;
+done:
+	mutex_unlock(&kernel_map_global_lock);
+}
+
+static void kgsl_page_alloc_free(struct kgsl_memdesc *memdesc)
+{
+	kgsl_page_alloc_unmap_kernel(memdesc);
+	/* we certainly do not expect the hostptr to still be mapped */
+	BUG_ON(memdesc->hostptr);
+
+	/* Secure buffers need to be unlocked before being freed */
+	if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED) {
+		int ret;
+		int dest_perms = PERM_READ | PERM_WRITE | PERM_EXEC;
+		int source_vm = VMID_CP_PIXEL;
+		int dest_vm = VMID_HLOS;
+
+		ret = hyp_assign_table(memdesc->sgt, &source_vm, 1,
+					&dest_vm, &dest_perms, 1);
+		if (ret) {
+			pr_err("Secure buf unlock failed: gpuaddr: %llx size: %llx ret: %d\n",
+					memdesc->gpuaddr, memdesc->size, ret);
+			BUG();
+		}
+
+		atomic_long_sub(memdesc->size, &kgsl_driver.stats.secure);
+	} else {
+		atomic_long_sub(memdesc->size, &kgsl_driver.stats.page_alloc);
+	}
+
+	if (memdesc->priv & KGSL_MEMDESC_TZ_LOCKED) {
+		struct sg_page_iter sg_iter;
+
+		for_each_sg_page(memdesc->sgt->sgl, &sg_iter,
+					memdesc->sgt->nents, 0)
+			ClearPagePrivate(sg_page_iter_page(&sg_iter));
+
+	}
+
+	/* Free pages using the pages array for non secure paged memory */
+	if (memdesc->pages != NULL)
+		kgsl_pool_free_pages(memdesc->pages, memdesc->page_count);
+	else
+		kgsl_pool_free_sgt(memdesc->sgt);
+
+}
+
+/*
+ * kgsl_page_alloc_map_kernel - Map the memory in memdesc to kernel address
+ * space
+ *
+ * @memdesc - The memory descriptor which contains information about the memory
+ *
+ * Return: 0 on success else error code
+ */
+static int kgsl_page_alloc_map_kernel(struct kgsl_memdesc *memdesc)
+{
+	int ret = 0;
+
+	/* Sanity check - don't map more than we could possibly chew */
+	if (memdesc->size > ULONG_MAX)
+		return -ENOMEM;
+
+	mutex_lock(&kernel_map_global_lock);
+	if ((!memdesc->hostptr) && (memdesc->pages != NULL)) {
+		pgprot_t page_prot = pgprot_writecombine(PAGE_KERNEL);
+
+		memdesc->hostptr = vmap(memdesc->pages, memdesc->page_count,
+					VM_IOREMAP, page_prot);
+		if (memdesc->hostptr)
+			KGSL_STATS_ADD(memdesc->size,
+				&kgsl_driver.stats.vmalloc,
+				&kgsl_driver.stats.vmalloc_max);
+		else
+			ret = -ENOMEM;
+	}
+	if (memdesc->hostptr)
+		memdesc->hostptr_count++;
+
+	mutex_unlock(&kernel_map_global_lock);
+
+	return ret;
+}
+
+static int kgsl_contiguous_vmfault(struct kgsl_memdesc *memdesc,
+				struct vm_area_struct *vma,
+				struct vm_fault *vmf)
+{
+	unsigned long offset, pfn;
+	int ret;
+
+	offset = ((unsigned long) vmf->virtual_address - vma->vm_start) >>
+		PAGE_SHIFT;
+
+	pfn = (memdesc->physaddr >> PAGE_SHIFT) + offset;
+	ret = vm_insert_pfn(vma, (unsigned long) vmf->virtual_address, pfn);
+
+	if (ret == -ENOMEM || ret == -EAGAIN)
+		return VM_FAULT_OOM;
+	else if (ret == -EFAULT)
+		return VM_FAULT_SIGBUS;
+
+	memdesc->mapsize += PAGE_SIZE;
+
+	return VM_FAULT_NOPAGE;
+}
+
+static void kgsl_cma_coherent_free(struct kgsl_memdesc *memdesc)
+{
+	unsigned long attrs = 0;
+
+	if (memdesc->hostptr) {
+		if (memdesc->priv & KGSL_MEMDESC_SECURE) {
+			atomic_long_sub(memdesc->size,
+				&kgsl_driver.stats.secure);
+
+			kgsl_cma_unlock_secure(memdesc);
+			attrs = (unsigned long)&memdesc->attrs;
+		} else
+			atomic_long_sub(memdesc->size,
+				&kgsl_driver.stats.coherent);
+
+		dma_free_attrs(memdesc->dev, (size_t) memdesc->size,
+			memdesc->hostptr, memdesc->physaddr, attrs);
+	}
+}
+
+/* Global */
+static struct kgsl_memdesc_ops kgsl_page_alloc_ops = {
+	.free = kgsl_page_alloc_free,
+	.vmflags = VM_DONTDUMP | VM_DONTEXPAND | VM_DONTCOPY,
+	.vmfault = kgsl_page_alloc_vmfault,
+	.map_kernel = kgsl_page_alloc_map_kernel,
+	.unmap_kernel = kgsl_page_alloc_unmap_kernel,
+};
+
+/* CMA ops - used during NOMMU mode */
+static struct kgsl_memdesc_ops kgsl_cma_ops = {
+	.free = kgsl_cma_coherent_free,
+	.vmflags = VM_DONTDUMP | VM_PFNMAP | VM_DONTEXPAND | VM_DONTCOPY,
+	.vmfault = kgsl_contiguous_vmfault,
+};
+
+#ifdef CONFIG_ARM64
+/*
+ * For security reasons, ARMv8 doesn't allow invalidate only on read-only
+ * mapping. It would be performance prohibitive to read the permissions on
+ * the buffer before the operation. Every use case that we have found does not
+ * assume that an invalidate operation is invalidate only, so we feel
+ * comfortable turning invalidates into flushes for these targets
+ */
+static inline unsigned int _fixup_cache_range_op(unsigned int op)
+{
+	if (op == KGSL_CACHE_OP_INV)
+		return KGSL_CACHE_OP_FLUSH;
+	return op;
+}
+#else
+static inline unsigned int _fixup_cache_range_op(unsigned int op)
+{
+	return op;
+}
+#endif
+
+int kgsl_cache_range_op(struct kgsl_memdesc *memdesc, uint64_t offset,
+		uint64_t size, unsigned int op)
+{
+	/*
+	 * If the buffer is mapped in the kernel operate on that address
+	 * otherwise use the user address
+	 */
+
+	void *addr = (memdesc->hostptr) ?
+		memdesc->hostptr : (void *) memdesc->useraddr;
+
+	if (size == 0 || size > UINT_MAX)
+		return -EINVAL;
+
+	/* Make sure that the offset + size does not overflow */
+	if ((offset + size < offset) || (offset + size < size))
+		return -ERANGE;
+
+	/* Make sure the offset + size do not overflow the address */
+	if (addr + ((size_t) offset + (size_t) size) < addr)
+		return -ERANGE;
+
+	/* Check that offset+length does not exceed memdesc->size */
+	if (offset + size > memdesc->size)
+		return -ERANGE;
+
+	/* Return quietly if the buffer isn't mapped on the CPU */
+	if (addr == NULL)
+		return 0;
+
+	addr = addr + offset;
+
+	/*
+	 * The dmac_xxx_range functions handle addresses and sizes that
+	 * are not aligned to the cacheline size correctly.
+	 */
+
+	switch (_fixup_cache_range_op(op)) {
+	case KGSL_CACHE_OP_FLUSH:
+		dmac_flush_range(addr, addr + (size_t) size);
+		break;
+	case KGSL_CACHE_OP_CLEAN:
+		dmac_clean_range(addr, addr + (size_t) size);
+		break;
+	case KGSL_CACHE_OP_INV:
+		dmac_inv_range(addr, addr + (size_t) size);
+		break;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_cache_range_op);
+
+#ifndef CONFIG_ALLOC_BUFFERS_IN_4K_CHUNKS
+static inline int get_page_size(size_t size, unsigned int align)
+{
+	if (align >= ilog2(SZ_1M) && size >= SZ_1M)
+		return SZ_1M;
+	else if (align >= ilog2(SZ_64K) && size >= SZ_64K)
+		return SZ_64K;
+	else if (align >= ilog2(SZ_8K) && size >= SZ_8K)
+		return SZ_8K;
+	else
+		return PAGE_SIZE;
+}
+#else
+static inline int get_page_size(size_t size, unsigned int align)
+{
+	return PAGE_SIZE;
+}
+#endif
+
+int
+kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
+			uint64_t size)
+{
+	int ret = 0;
+	unsigned int j, page_size, len_alloc;
+	unsigned int pcount = 0;
+	size_t len;
+	unsigned int align;
+
+	size = PAGE_ALIGN(size);
+	if (size == 0 || size > UINT_MAX)
+		return -EINVAL;
+
+	align = (memdesc->flags & KGSL_MEMALIGN_MASK) >> KGSL_MEMALIGN_SHIFT;
+
+	page_size = get_page_size(size, align);
+
+	/*
+	 * The alignment cannot be less than the intended page size - it can be
+	 * larger however to accommodate hardware quirks
+	 */
+
+	if (align < ilog2(page_size)) {
+		kgsl_memdesc_set_align(memdesc, ilog2(page_size));
+		align = ilog2(page_size);
+	}
+
+	/*
+	 * There needs to be enough room in the page array to be able to
+	 * service the allocation entirely with PAGE_SIZE sized chunks
+	 */
+
+	len_alloc = PAGE_ALIGN(size) >> PAGE_SHIFT;
+
+	memdesc->ops = &kgsl_page_alloc_ops;
+
+	/*
+	 * Allocate space to store the list of pages. This is an array of
+	 * pointers so we can track 1024 pages per page of allocation.
+	 * Keep this array around for non global non secure buffers that
+	 * are allocated by kgsl. This helps with improving the vm fault
+	 * routine by finding the faulted page in constant time.
+	 */
+
+	memdesc->pages = kgsl_malloc(len_alloc * sizeof(struct page *));
+
+	if (memdesc->pages == NULL) {
+		ret = -ENOMEM;
+		goto done;
+	}
+
+	len = size;
+
+	while (len > 0) {
+		int page_count;
+
+		page_count = kgsl_pool_alloc_page(&page_size,
+					memdesc->pages + pcount,
+					len_alloc - pcount,
+					&align);
+		if (page_count <= 0) {
+			if (page_count == -EAGAIN)
+				continue;
+
+			/*
+			 * Update sglen and memdesc size,as requested allocation
+			 * not served fully. So that they can be correctly freed
+			 * in kgsl_sharedmem_free().
+			 */
+			memdesc->size = (size - len);
+
+			if (sharedmem_noretry_flag != true)
+				KGSL_CORE_ERR(
+					"Out of memory: only allocated %lldKB of %lldKB requested\n",
+					(size - len) >> 10, size >> 10);
+
+			ret = -ENOMEM;
+			goto done;
+		}
+
+		pcount += page_count;
+		len -= page_size;
+		memdesc->size += page_size;
+		memdesc->page_count += page_count;
+
+		/* Get the needed page size for the next iteration */
+		page_size = get_page_size(len, align);
+	}
+
+	/* Call to the hypervisor to lock any secure buffer allocations */
+	if (memdesc->flags & KGSL_MEMFLAGS_SECURE) {
+		unsigned int i;
+		struct scatterlist *sg;
+		int dest_perms = PERM_READ | PERM_WRITE;
+		int source_vm = VMID_HLOS;
+		int dest_vm = VMID_CP_PIXEL;
+
+		memdesc->sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+		if (memdesc->sgt == NULL) {
+			ret = -ENOMEM;
+			goto done;
+		}
+
+		ret = sg_alloc_table_from_pages(memdesc->sgt, memdesc->pages,
+			memdesc->page_count, 0, memdesc->size, GFP_KERNEL);
+		if (ret) {
+			kfree(memdesc->sgt);
+			goto done;
+		}
+
+		ret = hyp_assign_table(memdesc->sgt, &source_vm, 1,
+					&dest_vm, &dest_perms, 1);
+		if (ret) {
+			sg_free_table(memdesc->sgt);
+			kfree(memdesc->sgt);
+			memdesc->sgt = NULL;
+			goto done;
+		}
+
+		/* Set private bit for each sg to indicate that its secured */
+		for_each_sg(memdesc->sgt->sgl, sg, memdesc->sgt->nents, i)
+			SetPagePrivate(sg_page(sg));
+
+		memdesc->priv |= KGSL_MEMDESC_TZ_LOCKED;
+
+		/* Record statistics */
+		KGSL_STATS_ADD(memdesc->size, &kgsl_driver.stats.secure,
+			&kgsl_driver.stats.secure_max);
+
+		/*
+		 * We don't need the array for secure buffers because they are
+		 * not mapped to CPU
+		 */
+		kgsl_free(memdesc->pages);
+		memdesc->pages = NULL;
+		memdesc->page_count = 0;
+
+		/* Don't map and zero the locked secure buffer */
+		goto done;
+	}
+
+	KGSL_STATS_ADD(memdesc->size, &kgsl_driver.stats.page_alloc,
+		&kgsl_driver.stats.page_alloc_max);
+
+done:
+	if (ret) {
+		if (memdesc->pages) {
+			unsigned int count = 1;
+
+			for (j = 0; j < pcount; j += count) {
+				count = 1 << compound_order(memdesc->pages[j]);
+				kgsl_pool_free_page(memdesc->pages[j]);
+			}
+		}
+
+		kgsl_free(memdesc->pages);
+		memset(memdesc, 0, sizeof(*memdesc));
+	}
+
+	return ret;
+}
+
+void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc)
+{
+	if (memdesc == NULL || memdesc->size == 0)
+		return;
+
+	/* Make sure the memory object has been unmapped */
+	kgsl_mmu_put_gpuaddr(memdesc);
+
+	if (memdesc->ops && memdesc->ops->free)
+		memdesc->ops->free(memdesc);
+
+	if (memdesc->sgt) {
+		sg_free_table(memdesc->sgt);
+		kfree(memdesc->sgt);
+	}
+
+	if (memdesc->pages)
+		kgsl_free(memdesc->pages);
+
+	memset(memdesc, 0, sizeof(*memdesc));
+}
+EXPORT_SYMBOL(kgsl_sharedmem_free);
+
+int
+kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc,
+			uint32_t *dst,
+			uint64_t offsetbytes)
+{
+	uint32_t *src;
+
+	if (WARN_ON(memdesc == NULL || memdesc->hostptr == NULL ||
+		dst == NULL))
+		return -EINVAL;
+
+	WARN_ON(offsetbytes % sizeof(uint32_t) != 0);
+	if (offsetbytes % sizeof(uint32_t) != 0)
+		return -EINVAL;
+
+	WARN_ON(offsetbytes > (memdesc->size - sizeof(uint32_t)));
+	if (offsetbytes > (memdesc->size - sizeof(uint32_t)))
+		return -ERANGE;
+
+	/*
+	 * We are reading shared memory between CPU and GPU.
+	 * Make sure reads before this are complete
+	 */
+	rmb();
+	src = (uint32_t *)(memdesc->hostptr + offsetbytes);
+	*dst = *src;
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_readl);
+
+int
+kgsl_sharedmem_writel(struct kgsl_device *device,
+			const struct kgsl_memdesc *memdesc,
+			uint64_t offsetbytes,
+			uint32_t src)
+{
+	uint32_t *dst;
+
+	if (WARN_ON(memdesc == NULL || memdesc->hostptr == NULL))
+		return -EINVAL;
+
+	WARN_ON(offsetbytes % sizeof(uint32_t) != 0);
+	if (offsetbytes % sizeof(uint32_t) != 0)
+		return -EINVAL;
+
+	WARN_ON(offsetbytes > (memdesc->size - sizeof(uint32_t)));
+	if (offsetbytes > (memdesc->size - sizeof(uint32_t)))
+		return -ERANGE;
+	dst = (uint32_t *)(memdesc->hostptr + offsetbytes);
+	*dst = src;
+
+	/*
+	 * We are writing to shared memory between CPU and GPU.
+	 * Make sure write above is posted immediately
+	 */
+	wmb();
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_writel);
+
+int
+kgsl_sharedmem_readq(const struct kgsl_memdesc *memdesc,
+			uint64_t *dst,
+			uint64_t offsetbytes)
+{
+	uint64_t *src;
+
+	if (WARN_ON(memdesc == NULL || memdesc->hostptr == NULL ||
+		dst == NULL))
+		return -EINVAL;
+
+	WARN_ON(offsetbytes % sizeof(uint32_t) != 0);
+	if (offsetbytes % sizeof(uint32_t) != 0)
+		return -EINVAL;
+
+	WARN_ON(offsetbytes > (memdesc->size - sizeof(uint32_t)));
+	if (offsetbytes > (memdesc->size - sizeof(uint32_t)))
+		return -ERANGE;
+
+	/*
+	 * We are reading shared memory between CPU and GPU.
+	 * Make sure reads before this are complete
+	 */
+	rmb();
+	src = (uint64_t *)(memdesc->hostptr + offsetbytes);
+	*dst = *src;
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_readq);
+
+int
+kgsl_sharedmem_writeq(struct kgsl_device *device,
+			const struct kgsl_memdesc *memdesc,
+			uint64_t offsetbytes,
+			uint64_t src)
+{
+	uint64_t *dst;
+
+	if (WARN_ON(memdesc == NULL || memdesc->hostptr == NULL))
+		return -EINVAL;
+
+	WARN_ON(offsetbytes % sizeof(uint32_t) != 0);
+	if (offsetbytes % sizeof(uint32_t) != 0)
+		return -EINVAL;
+
+	WARN_ON(offsetbytes > (memdesc->size - sizeof(uint32_t)));
+	if (offsetbytes > (memdesc->size - sizeof(uint32_t)))
+		return -ERANGE;
+	dst = (uint64_t *)(memdesc->hostptr + offsetbytes);
+	*dst = src;
+
+	/*
+	 * We are writing to shared memory between CPU and GPU.
+	 * Make sure write above is posted immediately
+	 */
+	wmb();
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_writeq);
+
+int
+kgsl_sharedmem_set(struct kgsl_device *device,
+		const struct kgsl_memdesc *memdesc, uint64_t offsetbytes,
+		unsigned int value, uint64_t sizebytes)
+{
+	if (WARN_ON(memdesc == NULL || memdesc->hostptr == NULL))
+		return -EINVAL;
+
+	if (WARN_ON(offsetbytes + sizebytes > memdesc->size))
+		return -EINVAL;
+
+	memset(memdesc->hostptr + offsetbytes, value, sizebytes);
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_set);
+
+static const char * const memtype_str[] = {
+	[KGSL_MEMTYPE_OBJECTANY] = "any(0)",
+	[KGSL_MEMTYPE_FRAMEBUFFER] = "framebuffer",
+	[KGSL_MEMTYPE_RENDERBUFFER] = "renderbuffer",
+	[KGSL_MEMTYPE_ARRAYBUFFER] = "arraybuffer",
+	[KGSL_MEMTYPE_ELEMENTARRAYBUFFER] = "elementarraybuffer",
+	[KGSL_MEMTYPE_VERTEXARRAYBUFFER] = "vertexarraybuffer",
+	[KGSL_MEMTYPE_TEXTURE] = "texture",
+	[KGSL_MEMTYPE_SURFACE] = "surface",
+	[KGSL_MEMTYPE_EGL_SURFACE] = "egl_surface",
+	[KGSL_MEMTYPE_GL] = "gl",
+	[KGSL_MEMTYPE_CL] = "cl",
+	[KGSL_MEMTYPE_CL_BUFFER_MAP] = "cl_buffer_map",
+	[KGSL_MEMTYPE_CL_BUFFER_NOMAP] = "cl_buffer_nomap",
+	[KGSL_MEMTYPE_CL_IMAGE_MAP] = "cl_image_map",
+	[KGSL_MEMTYPE_CL_IMAGE_NOMAP] = "cl_image_nomap",
+	[KGSL_MEMTYPE_CL_KERNEL_STACK] = "cl_kernel_stack",
+	[KGSL_MEMTYPE_COMMAND] = "command",
+	[KGSL_MEMTYPE_2D] = "2d",
+	[KGSL_MEMTYPE_EGL_IMAGE] = "egl_image",
+	[KGSL_MEMTYPE_EGL_SHADOW] = "egl_shadow",
+	[KGSL_MEMTYPE_MULTISAMPLE] = "egl_multisample",
+	/* KGSL_MEMTYPE_KERNEL handled below, to avoid huge array */
+};
+
+void kgsl_get_memory_usage(char *name, size_t name_size, uint64_t memflags)
+{
+	unsigned int type = MEMFLAGS(memflags, KGSL_MEMTYPE_MASK,
+		KGSL_MEMTYPE_SHIFT);
+
+	if (type == KGSL_MEMTYPE_KERNEL)
+		strlcpy(name, "kernel", name_size);
+	else if (type < ARRAY_SIZE(memtype_str) && memtype_str[type] != NULL)
+		strlcpy(name, memtype_str[type], name_size);
+	else
+		snprintf(name, name_size, "unknown(%3d)", type);
+}
+EXPORT_SYMBOL(kgsl_get_memory_usage);
+
+int kgsl_sharedmem_alloc_contig(struct kgsl_device *device,
+			struct kgsl_memdesc *memdesc, uint64_t size)
+{
+	int result = 0;
+
+	size = PAGE_ALIGN(size);
+	if (size == 0 || size > SIZE_MAX)
+		return -EINVAL;
+
+	memdesc->size = size;
+	memdesc->ops = &kgsl_cma_ops;
+	memdesc->dev = device->dev->parent;
+
+	memdesc->hostptr = dma_alloc_attrs(memdesc->dev, (size_t) size,
+		&memdesc->physaddr, GFP_KERNEL, 0);
+
+	if (memdesc->hostptr == NULL) {
+		result = -ENOMEM;
+		goto err;
+	}
+
+	result = memdesc_sg_dma(memdesc, memdesc->physaddr, size);
+	if (result)
+		goto err;
+
+	/* Record statistics */
+
+	if (kgsl_mmu_get_mmutype(device) == KGSL_MMU_TYPE_NONE)
+		memdesc->gpuaddr = memdesc->physaddr;
+
+	KGSL_STATS_ADD(size, &kgsl_driver.stats.coherent,
+		&kgsl_driver.stats.coherent_max);
+
+err:
+	if (result)
+		kgsl_sharedmem_free(memdesc);
+
+	return result;
+}
+EXPORT_SYMBOL(kgsl_sharedmem_alloc_contig);
+
+static int scm_lock_chunk(struct kgsl_memdesc *memdesc, int lock)
+{
+	struct cp2_lock_req request;
+	unsigned int resp;
+	unsigned int *chunk_list;
+	struct scm_desc desc = {0};
+	int result;
+
+	/*
+	 * Flush the virt addr range before sending the memory to the
+	 * secure environment to ensure the data is actually present
+	 * in RAM
+	 *
+	 * Chunk_list holds the physical address of secure memory.
+	 * Pass in the virtual address of chunk_list to flush.
+	 * Chunk_list size is 1 because secure memory is physically
+	 * contiguous.
+	 */
+	chunk_list = kzalloc(sizeof(unsigned int), GFP_KERNEL);
+	if (!chunk_list)
+		return -ENOMEM;
+
+	chunk_list[0] = memdesc->physaddr;
+	dmac_flush_range((void *)chunk_list, (void *)chunk_list + 1);
+
+	request.chunks.chunk_list = virt_to_phys(chunk_list);
+	/*
+	 * virt_to_phys(chunk_list) may be an address > 4GB. It is guaranteed
+	 * that when using scm_call (the older interface), the phys addresses
+	 * will be restricted to below 4GB.
+	 */
+	desc.args[0] = virt_to_phys(chunk_list);
+	desc.args[1] = request.chunks.chunk_list_size = 1;
+	desc.args[2] = request.chunks.chunk_size = (unsigned int) memdesc->size;
+	desc.args[3] = request.mem_usage = 0;
+	desc.args[4] = request.lock = lock;
+	desc.args[5] = 0;
+	desc.arginfo = SCM_ARGS(6, SCM_RW, SCM_VAL, SCM_VAL, SCM_VAL, SCM_VAL,
+				SCM_VAL);
+	kmap_flush_unused();
+	kmap_atomic_flush_unused();
+	if (!is_scm_armv8()) {
+		result = scm_call(SCM_SVC_MP, MEM_PROTECT_LOCK_ID2,
+				&request, sizeof(request), &resp, sizeof(resp));
+	} else {
+		result = scm_call2(SCM_SIP_FNID(SCM_SVC_MP,
+				   MEM_PROTECT_LOCK_ID2_FLAT), &desc);
+		resp = desc.ret[0];
+	}
+
+	kfree(chunk_list);
+	return result;
+}
+
+static int kgsl_cma_alloc_secure(struct kgsl_device *device,
+			struct kgsl_memdesc *memdesc, uint64_t size)
+{
+	struct kgsl_iommu *iommu = KGSL_IOMMU_PRIV(device);
+	int result = 0;
+	size_t aligned;
+
+	/* Align size to 1M boundaries */
+	aligned = ALIGN(size, SZ_1M);
+
+	/* The SCM call uses an unsigned int for the size */
+	if (aligned == 0 || aligned > UINT_MAX)
+		return -EINVAL;
+
+	/*
+	 * If there is more than a page gap between the requested size and the
+	 * aligned size we don't need to add more memory for a guard page. Yay!
+	 */
+
+	if (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE)
+		if (aligned - size >= SZ_4K)
+			memdesc->priv &= ~KGSL_MEMDESC_GUARD_PAGE;
+
+	memdesc->size = aligned;
+	memdesc->ops = &kgsl_cma_ops;
+	memdesc->dev = iommu->ctx[KGSL_IOMMU_CONTEXT_SECURE].dev;
+
+	memdesc->attrs |= DMA_ATTR_STRONGLY_ORDERED;
+
+	memdesc->hostptr = dma_alloc_attrs(memdesc->dev, aligned,
+		&memdesc->physaddr, GFP_KERNEL, memdesc->attrs);
+
+	if (memdesc->hostptr == NULL) {
+		result = -ENOMEM;
+		goto err;
+	}
+
+	result = memdesc_sg_dma(memdesc, memdesc->physaddr, aligned);
+	if (result)
+		goto err;
+
+	result = scm_lock_chunk(memdesc, 1);
+
+	if (result != 0)
+		goto err;
+
+	/* Set the private bit to indicate that we've secured this */
+	SetPagePrivate(sg_page(memdesc->sgt->sgl));
+
+	memdesc->priv |= KGSL_MEMDESC_TZ_LOCKED;
+
+	/* Record statistics */
+	KGSL_STATS_ADD(aligned, &kgsl_driver.stats.secure,
+	       &kgsl_driver.stats.secure_max);
+err:
+	if (result)
+		kgsl_sharedmem_free(memdesc);
+
+	return result;
+}
+
+/**
+ * kgsl_cma_unlock_secure() - Unlock secure memory by calling TZ
+ * @memdesc: memory descriptor
+ */
+static void kgsl_cma_unlock_secure(struct kgsl_memdesc *memdesc)
+{
+	if (memdesc->size == 0 || !(memdesc->priv & KGSL_MEMDESC_TZ_LOCKED))
+		return;
+
+	if (!scm_lock_chunk(memdesc, 0))
+		ClearPagePrivate(sg_page(memdesc->sgt->sgl));
+}
+
+void kgsl_sharedmem_set_noretry(bool val)
+{
+	sharedmem_noretry_flag = val;
+}
+
+bool kgsl_sharedmem_get_noretry(void)
+{
+	return sharedmem_noretry_flag;
+}
diff --git a/drivers/gpu/msm/kgsl_sharedmem.h b/drivers/gpu/msm/kgsl_sharedmem.h
new file mode 100644
index 0000000..1ef31ef
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_sharedmem.h
@@ -0,0 +1,368 @@
+/* Copyright (c) 2002,2007-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_SHAREDMEM_H
+#define __KGSL_SHAREDMEM_H
+
+#include <linux/dma-mapping.h>
+
+#include "kgsl_mmu.h"
+
+struct kgsl_device;
+struct kgsl_process_private;
+
+#define KGSL_CACHE_OP_INV       0x01
+#define KGSL_CACHE_OP_FLUSH     0x02
+#define KGSL_CACHE_OP_CLEAN     0x03
+
+int kgsl_sharedmem_alloc_contig(struct kgsl_device *device,
+			struct kgsl_memdesc *memdesc,
+			uint64_t size);
+
+void kgsl_sharedmem_free(struct kgsl_memdesc *memdesc);
+
+int kgsl_sharedmem_readl(const struct kgsl_memdesc *memdesc,
+			uint32_t *dst,
+			uint64_t offsetbytes);
+
+int kgsl_sharedmem_writel(struct kgsl_device *device,
+			const struct kgsl_memdesc *memdesc,
+			uint64_t offsetbytes,
+			uint32_t src);
+
+int kgsl_sharedmem_readq(const struct kgsl_memdesc *memdesc,
+			uint64_t *dst,
+			uint64_t offsetbytes);
+
+int kgsl_sharedmem_writeq(struct kgsl_device *device,
+			const struct kgsl_memdesc *memdesc,
+			uint64_t offsetbytes,
+			uint64_t src);
+
+int kgsl_sharedmem_set(struct kgsl_device *device,
+			const struct kgsl_memdesc *memdesc,
+			uint64_t offsetbytes, unsigned int value,
+			uint64_t sizebytes);
+
+int kgsl_cache_range_op(struct kgsl_memdesc *memdesc,
+			uint64_t offset, uint64_t size,
+			unsigned int op);
+
+void kgsl_process_init_sysfs(struct kgsl_device *device,
+		struct kgsl_process_private *private);
+void kgsl_process_uninit_sysfs(struct kgsl_process_private *private);
+
+int kgsl_sharedmem_init_sysfs(void);
+void kgsl_sharedmem_uninit_sysfs(void);
+
+int kgsl_allocate_user(struct kgsl_device *device,
+		struct kgsl_memdesc *memdesc,
+		uint64_t size, uint64_t flags);
+
+void kgsl_get_memory_usage(char *str, size_t len, uint64_t memflags);
+
+int kgsl_sharedmem_page_alloc_user(struct kgsl_memdesc *memdesc,
+				uint64_t size);
+
+#define MEMFLAGS(_flags, _mask, _shift) \
+	((unsigned int) (((_flags) & (_mask)) >> (_shift)))
+
+/*
+ * kgsl_memdesc_get_align - Get alignment flags from a memdesc
+ * @memdesc - the memdesc
+ *
+ * Returns the alignment requested, as power of 2 exponent.
+ */
+static inline int
+kgsl_memdesc_get_align(const struct kgsl_memdesc *memdesc)
+{
+	return MEMFLAGS(memdesc->flags, KGSL_MEMALIGN_MASK,
+		KGSL_MEMALIGN_SHIFT);
+}
+
+/*
+ * kgsl_memdesc_get_pagesize - Get pagesize based on alignment
+ * @memdesc - the memdesc
+ *
+ * Returns the pagesize based on memdesc alignment
+ */
+static inline int
+kgsl_memdesc_get_pagesize(const struct kgsl_memdesc *memdesc)
+{
+	return (1 << kgsl_memdesc_get_align(memdesc));
+}
+
+/*
+ * kgsl_memdesc_get_cachemode - Get cache mode of a memdesc
+ * @memdesc: the memdesc
+ *
+ * Returns a KGSL_CACHEMODE* value.
+ */
+static inline int
+kgsl_memdesc_get_cachemode(const struct kgsl_memdesc *memdesc)
+{
+	return MEMFLAGS(memdesc->flags, KGSL_CACHEMODE_MASK,
+		KGSL_CACHEMODE_SHIFT);
+}
+
+static inline unsigned int
+kgsl_memdesc_get_memtype(const struct kgsl_memdesc *memdesc)
+{
+	return MEMFLAGS(memdesc->flags, KGSL_MEMTYPE_MASK,
+		KGSL_MEMTYPE_SHIFT);
+}
+/*
+ * kgsl_memdesc_set_align - Set alignment flags of a memdesc
+ * @memdesc - the memdesc
+ * @align - alignment requested, as a power of 2 exponent.
+ */
+static inline int
+kgsl_memdesc_set_align(struct kgsl_memdesc *memdesc, unsigned int align)
+{
+	if (align > 32)
+		align = 32;
+
+	memdesc->flags &= ~KGSL_MEMALIGN_MASK;
+	memdesc->flags |= (align << KGSL_MEMALIGN_SHIFT) & KGSL_MEMALIGN_MASK;
+	return 0;
+}
+
+/**
+ * kgsl_memdesc_usermem_type - return buffer type
+ * @memdesc - the memdesc
+ *
+ * Returns a KGSL_MEM_ENTRY_* value for this buffer, which
+ * identifies if was allocated by us, or imported from
+ * another allocator.
+ */
+static inline unsigned int
+kgsl_memdesc_usermem_type(const struct kgsl_memdesc *memdesc)
+{
+	return MEMFLAGS(memdesc->flags, KGSL_MEMFLAGS_USERMEM_MASK,
+		KGSL_MEMFLAGS_USERMEM_SHIFT);
+}
+
+/**
+ * memdesg_sg_dma() - Turn a dma_addr (from CMA) into a sg table
+ * @memdesc: Pointer to the memdesc structure
+ * @addr: Physical address from the dma_alloc function
+ * @size: Size of the chunk
+ *
+ * Create a sg table for the contigious chunk specified by addr and size.
+ */
+static inline int
+memdesc_sg_dma(struct kgsl_memdesc *memdesc,
+		phys_addr_t addr, uint64_t size)
+{
+	int ret;
+	struct page *page = phys_to_page(addr);
+
+	memdesc->sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (memdesc->sgt == NULL)
+		return -ENOMEM;
+
+	ret = sg_alloc_table(memdesc->sgt, 1, GFP_KERNEL);
+	if (ret) {
+		kfree(memdesc->sgt);
+		memdesc->sgt = NULL;
+		return ret;
+	}
+
+	sg_set_page(memdesc->sgt->sgl, page, (size_t) size, 0);
+	return 0;
+}
+
+/*
+ * kgsl_memdesc_is_global - is this a globally mapped buffer?
+ * @memdesc: the memdesc
+ *
+ * Returns nonzero if this is a global mapping, 0 otherwise
+ */
+static inline int kgsl_memdesc_is_global(const struct kgsl_memdesc *memdesc)
+{
+	return (memdesc->priv & KGSL_MEMDESC_GLOBAL) != 0;
+}
+
+/*
+ * kgsl_memdesc_is_secured - is this a secure buffer?
+ * @memdesc: the memdesc
+ *
+ * Returns true if this is a secure mapping, false otherwise
+ */
+static inline bool kgsl_memdesc_is_secured(const struct kgsl_memdesc *memdesc)
+{
+	return memdesc && (memdesc->priv & KGSL_MEMDESC_SECURE);
+}
+
+/*
+ * kgsl_memdesc_has_guard_page - is the last page a guard page?
+ * @memdesc - the memdesc
+ *
+ * Returns nonzero if there is a guard page, 0 otherwise
+ */
+static inline int
+kgsl_memdesc_has_guard_page(const struct kgsl_memdesc *memdesc)
+{
+	return (memdesc->priv & KGSL_MEMDESC_GUARD_PAGE) != 0;
+}
+
+/*
+ * kgsl_memdesc_guard_page_size - returns guard page size
+ * @memdesc - the memdesc
+ *
+ * Returns guard page size
+ */
+static inline uint64_t
+kgsl_memdesc_guard_page_size(const struct kgsl_memdesc *memdesc)
+{
+	if (!kgsl_memdesc_has_guard_page(memdesc))
+		return 0;
+
+	if (kgsl_memdesc_is_secured(memdesc)) {
+		if (memdesc->pagetable != NULL &&
+				memdesc->pagetable->mmu != NULL)
+			return memdesc->pagetable->mmu->secure_align_mask + 1;
+	}
+
+	return PAGE_SIZE;
+}
+
+/*
+ * kgsl_memdesc_use_cpu_map - use the same virtual mapping on CPU and GPU?
+ * @memdesc - the memdesc
+ */
+static inline int
+kgsl_memdesc_use_cpu_map(const struct kgsl_memdesc *memdesc)
+{
+	return (memdesc->flags & KGSL_MEMFLAGS_USE_CPU_MAP) != 0;
+}
+
+/*
+ * kgsl_memdesc_footprint - get the size of the mmap region
+ * @memdesc - the memdesc
+ *
+ * The entire memdesc must be mapped. Additionally if the
+ * CPU mapping is going to be mirrored, there must be room
+ * for the guard page to be mapped so that the address spaces
+ * match up.
+ */
+static inline uint64_t
+kgsl_memdesc_footprint(const struct kgsl_memdesc *memdesc)
+{
+	return  memdesc->size + kgsl_memdesc_guard_page_size(memdesc);
+}
+
+/*
+ * kgsl_allocate_global() - Allocate GPU accessible memory that will be global
+ * across all processes
+ * @device: The device pointer to which the memdesc belongs
+ * @memdesc: Pointer to a KGSL memory descriptor for the memory allocation
+ * @size: size of the allocation
+ * @flags: Allocation flags that control how the memory is mapped
+ * @priv: Priv flags that controls memory attributes
+ *
+ * Allocate contiguous memory for internal use and add the allocation to the
+ * list of global pagetable entries that will be mapped at the same address in
+ * all pagetables.  This is for use for device wide GPU allocations such as
+ * ringbuffers.
+ */
+static inline int kgsl_allocate_global(struct kgsl_device *device,
+	struct kgsl_memdesc *memdesc, uint64_t size, uint64_t flags,
+	unsigned int priv, const char *name)
+{
+	int ret;
+
+	memdesc->flags = flags;
+	memdesc->priv = priv;
+
+	if (((memdesc->priv & KGSL_MEMDESC_CONTIG) != 0) ||
+		(kgsl_mmu_get_mmutype(device) == KGSL_MMU_TYPE_NONE))
+		ret = kgsl_sharedmem_alloc_contig(device, memdesc,
+						(size_t) size);
+	else {
+		ret = kgsl_sharedmem_page_alloc_user(memdesc, (size_t) size);
+		if (ret == 0) {
+			if (kgsl_memdesc_map(memdesc) == NULL)
+				ret = -ENOMEM;
+		}
+	}
+
+	if (ret == 0)
+		kgsl_mmu_add_global(device, memdesc, name);
+
+	return ret;
+}
+
+/**
+ * kgsl_free_global() - Free a device wide GPU allocation and remove it from the
+ * global pagetable entry list
+ *
+ * @device: Pointer to the device
+ * @memdesc: Pointer to the GPU memory descriptor to free
+ *
+ * Remove the specific memory descriptor from the global pagetable entry list
+ * and free it
+ */
+static inline void kgsl_free_global(struct kgsl_device *device,
+		struct kgsl_memdesc *memdesc)
+{
+	kgsl_mmu_remove_global(device, memdesc);
+	kgsl_sharedmem_free(memdesc);
+}
+
+void kgsl_sharedmem_set_noretry(bool val);
+bool kgsl_sharedmem_get_noretry(void);
+
+/**
+ * kgsl_alloc_sgt_from_pages() - Allocate a sg table
+ *
+ * @memdesc: memory descriptor of the allocation
+ *
+ * Allocate and return pointer to a sg table
+ */
+static inline struct sg_table *kgsl_alloc_sgt_from_pages(
+				struct kgsl_memdesc *m)
+{
+	int ret;
+	struct sg_table *sgt;
+
+	sgt = kmalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (sgt == NULL)
+		return ERR_PTR(-ENOMEM);
+
+	ret = sg_alloc_table_from_pages(sgt, m->pages, m->page_count, 0,
+					m->size, GFP_KERNEL);
+	if (ret) {
+		kfree(sgt);
+		return ERR_PTR(ret);
+	}
+
+	return sgt;
+}
+
+/**
+ * kgsl_free_sgt() - Free a sg table structure
+ *
+ * @sgt: sg table pointer to be freed
+ *
+ * Free the sg table allocated using sgt and free the
+ * sgt structure itself
+ */
+static inline void kgsl_free_sgt(struct sg_table *sgt)
+{
+	if (sgt != NULL) {
+		sg_free_table(sgt);
+		kfree(sgt);
+	}
+}
+
+#endif /* __KGSL_SHAREDMEM_H */
diff --git a/drivers/gpu/msm/kgsl_snapshot.c b/drivers/gpu/msm/kgsl_snapshot.c
new file mode 100644
index 0000000..4b1b5bc
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_snapshot.c
@@ -0,0 +1,1161 @@
+/* Copyright (c) 2012-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/export.h>
+#include <linux/time.h>
+#include <linux/sysfs.h>
+#include <linux/utsname.h>
+#include <linux/sched.h>
+#include <linux/idr.h>
+
+#include "kgsl.h"
+#include "kgsl_log.h"
+#include "kgsl_device.h"
+#include "kgsl_sharedmem.h"
+#include "kgsl_snapshot.h"
+#include "adreno_cp_parser.h"
+
+/* Placeholder for list of ib objects that contain all objects in that IB */
+
+struct kgsl_snapshot_cp_obj {
+	struct adreno_ib_object_list *ib_obj_list;
+	struct list_head node;
+};
+
+struct snapshot_obj_itr {
+	u8 *buf;      /* Buffer pointer to write to */
+	int pos;        /* Current position in the sequence */
+	loff_t offset;  /* file offset to start writing from */
+	size_t remain;  /* Bytes remaining in buffer */
+	size_t write;   /* Bytes written so far */
+};
+
+static void obj_itr_init(struct snapshot_obj_itr *itr, u8 *buf,
+	loff_t offset, size_t remain)
+{
+	itr->buf = buf;
+	itr->offset = offset;
+	itr->remain = remain;
+	itr->pos = 0;
+	itr->write = 0;
+}
+
+static int obj_itr_out(struct snapshot_obj_itr *itr, void *src, int size)
+{
+	if (itr->remain == 0)
+		return 0;
+
+	if ((itr->pos + size) <= itr->offset)
+		goto done;
+
+	/* Handle the case that offset is in the middle of the buffer */
+
+	if (itr->offset > itr->pos) {
+		src += (itr->offset - itr->pos);
+		size -= (itr->offset - itr->pos);
+
+		/* Advance pos to the offset start */
+		itr->pos = itr->offset;
+	}
+
+	if (size > itr->remain)
+		size = itr->remain;
+
+	memcpy(itr->buf, src, size);
+
+	itr->buf += size;
+	itr->write += size;
+	itr->remain -= size;
+
+done:
+	itr->pos += size;
+	return size;
+}
+
+/* idr_for_each function to count the number of contexts */
+
+static int snapshot_context_count(int id, void *ptr, void *data)
+{
+	int *count = data;
+	*count = *count + 1;
+
+	return 0;
+}
+
+/*
+ * To simplify the iterator loop use a global pointer instead of trying
+ * to pass around double star references to the snapshot data
+ */
+
+static u8 *_ctxtptr;
+
+static int snapshot_context_info(int id, void *ptr, void *data)
+{
+	struct kgsl_snapshot_linux_context_v2 *header =
+		(struct kgsl_snapshot_linux_context_v2 *)_ctxtptr;
+	struct kgsl_context *context = ptr;
+	struct kgsl_device *device;
+
+	device = context->device;
+
+	header->id = id;
+
+	/* Future-proof for per-context timestamps - for now, just
+	 * return the global timestamp for all contexts
+	 */
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_QUEUED,
+		&header->timestamp_queued);
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_CONSUMED,
+		&header->timestamp_consumed);
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED,
+		&header->timestamp_retired);
+
+	_ctxtptr += sizeof(struct kgsl_snapshot_linux_context_v2);
+
+	return 0;
+}
+
+/* Snapshot the Linux specific information */
+static size_t snapshot_os(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_linux_v2 *header =
+		(struct kgsl_snapshot_linux_v2 *)buf;
+	struct kgsl_pwrctrl *pwr = &device->pwrctrl;
+	int ctxtcount = 0;
+	size_t size = sizeof(*header);
+	struct kgsl_context *context;
+
+	/*
+	 * Figure out how many active contexts there are - these will
+	 * be appended on the end of the structure
+	 */
+
+	read_lock(&device->context_lock);
+	idr_for_each(&device->context_idr, snapshot_context_count, &ctxtcount);
+	read_unlock(&device->context_lock);
+
+	size += ctxtcount * sizeof(struct kgsl_snapshot_linux_context_v2);
+
+	/* Make sure there is enough room for the data */
+	if (remain < size) {
+		SNAPSHOT_ERR_NOMEM(device, "OS");
+		return 0;
+	}
+
+	memset(header, 0, sizeof(*header));
+
+	header->osid = KGSL_SNAPSHOT_OS_LINUX_V3;
+
+	/* Get the kernel build information */
+	strlcpy(header->release, utsname()->release, sizeof(header->release));
+	strlcpy(header->version, utsname()->version, sizeof(header->version));
+
+	/* Get the Unix time for the timestamp */
+	header->seconds = get_seconds();
+
+	/* Remember the power information */
+	header->power_flags = pwr->power_flags;
+	header->power_level = pwr->active_pwrlevel;
+	header->power_interval_timeout = pwr->interval_timeout;
+	header->grpclk = kgsl_get_clkrate(pwr->grp_clks[0]);
+
+	/*
+	 * Save the last active context from global index since its more
+	 * reliable than currrent RB index
+	 */
+	kgsl_sharedmem_readl(&device->memstore, &header->current_context,
+		KGSL_MEMSTORE_OFFSET(KGSL_MEMSTORE_GLOBAL, current_context));
+
+	context = kgsl_context_get(device, header->current_context);
+
+	/* Get the current PT base */
+	 header->ptbase = kgsl_mmu_get_current_ttbr0(&device->mmu);
+
+	/* And the PID for the task leader */
+	if (context) {
+		header->pid = context->tid;
+		strlcpy(header->comm, context->proc_priv->comm,
+				sizeof(header->comm));
+		kgsl_context_put(context);
+		context = NULL;
+	}
+
+	header->ctxtcount = ctxtcount;
+
+	_ctxtptr = buf + sizeof(*header);
+	/* append information for each context */
+
+	read_lock(&device->context_lock);
+	idr_for_each(&device->context_idr, snapshot_context_info, NULL);
+	read_unlock(&device->context_lock);
+
+	/* Return the size of the data segment */
+	return size;
+}
+
+static void kgsl_snapshot_put_object(struct kgsl_snapshot_object *obj)
+{
+	list_del(&obj->node);
+
+	obj->entry->memdesc.priv &= ~KGSL_MEMDESC_FROZEN;
+	kgsl_mem_entry_put(obj->entry);
+
+	kfree(obj);
+}
+
+/**
+ * kgsl_snapshot_have_object() - return 1 if the object has been processed
+ * @snapshot: the snapshot data
+ * @process: The process that owns the the object to freeze
+ * @gpuaddr: The gpu address of the object to freeze
+ * @size: the size of the object (may not always be the size of the region)
+ *
+ * Return 1 if the object is already in the list - this can save us from
+ * having to parse the same thing over again. There are 2 lists that are
+ * tracking objects so check for the object in both lists
+ */
+int kgsl_snapshot_have_object(struct kgsl_snapshot *snapshot,
+	struct kgsl_process_private *process,
+	uint64_t gpuaddr, uint64_t size)
+{
+	struct kgsl_snapshot_object *obj;
+	struct kgsl_snapshot_cp_obj *obj_cp;
+	struct adreno_ib_object *ib_obj;
+	int i;
+
+	/* Check whether the object is tracked already in ib list */
+	list_for_each_entry(obj_cp, &snapshot->cp_list, node) {
+		if (obj_cp->ib_obj_list == NULL
+			|| obj_cp->ib_obj_list->num_objs == 0)
+			continue;
+
+		ib_obj = &(obj_cp->ib_obj_list->obj_list[0]);
+		if (ib_obj->entry == NULL || ib_obj->entry->priv != process)
+			continue;
+
+		for (i = 0; i < obj_cp->ib_obj_list->num_objs; i++) {
+			ib_obj = &(obj_cp->ib_obj_list->obj_list[i]);
+			if ((gpuaddr >= ib_obj->gpuaddr) &&
+				((gpuaddr + size) <=
+				(ib_obj->gpuaddr + ib_obj->size)))
+				return 1;
+		}
+	}
+
+	list_for_each_entry(obj, &snapshot->obj_list, node) {
+		if (obj->entry == NULL || obj->entry->priv != process)
+			continue;
+
+		if ((gpuaddr >= obj->gpuaddr) &&
+			((gpuaddr + size) <= (obj->gpuaddr + obj->size)))
+			return 1;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(kgsl_snapshot_have_object);
+
+/**
+ * kgsl_snapshot_get_object() - Mark a GPU buffer to be frozen
+ * @snapshot: The snapshot data
+ * @process: The process that owns the object we want to freeze
+ * @gpuaddr: The gpu address of the object to freeze
+ * @size: the size of the object (may not always be the size of the region)
+ * @type: the type of object being saved (shader, vbo, etc)
+ *
+ * Mark and freeze a GPU buffer object.  This will prevent it from being
+ * freed until it can be copied out as part of the snapshot dump.  Returns the
+ * size of the object being frozen
+ */
+int kgsl_snapshot_get_object(struct kgsl_snapshot *snapshot,
+	struct kgsl_process_private *process, uint64_t gpuaddr,
+	uint64_t size, unsigned int type)
+{
+	struct kgsl_mem_entry *entry;
+	struct kgsl_snapshot_object *obj;
+	uint64_t offset;
+	int ret = -EINVAL;
+	unsigned int mem_type;
+
+	if (!gpuaddr)
+		return 0;
+
+	entry = kgsl_sharedmem_find(process, gpuaddr);
+
+	if (entry == NULL) {
+		KGSL_CORE_ERR("Unable to find GPU buffer 0x%016llX\n", gpuaddr);
+		return -EINVAL;
+	}
+
+	/* We can't freeze external memory, because we don't own it */
+	if (entry->memdesc.flags & KGSL_MEMFLAGS_USERMEM_MASK)
+		goto err_put;
+	/*
+	 * Do not save texture and render targets in snapshot,
+	 * they can be just too big
+	 */
+
+	mem_type = kgsl_memdesc_get_memtype(&entry->memdesc);
+	if (mem_type == KGSL_MEMTYPE_TEXTURE ||
+		mem_type == KGSL_MEMTYPE_EGL_SURFACE ||
+		mem_type == KGSL_MEMTYPE_EGL_IMAGE) {
+		ret = 0;
+		goto err_put;
+	}
+
+	/* Do not save sparse memory */
+	if (entry->memdesc.flags & KGSL_MEMFLAGS_SPARSE_VIRT ||
+			entry->memdesc.flags & KGSL_MEMFLAGS_SPARSE_PHYS) {
+		ret = 0;
+		goto err_put;
+	}
+
+	/*
+	 * size indicates the number of bytes in the region to save. This might
+	 * not always be the entire size of the region because some buffers are
+	 * sub-allocated from a larger region.  However, if size 0 was passed
+	 * thats a flag that the caller wants to capture the entire buffer
+	 */
+
+	if (size == 0) {
+		size = entry->memdesc.size;
+		offset = 0;
+
+		/* Adjust the gpuaddr to the start of the object */
+		gpuaddr = entry->memdesc.gpuaddr;
+	} else {
+		offset = gpuaddr - entry->memdesc.gpuaddr;
+	}
+
+	if (size + offset > entry->memdesc.size) {
+		KGSL_CORE_ERR("Invalid size for GPU buffer 0x%016llX\n",
+			gpuaddr);
+		goto err_put;
+	}
+
+	/* If the buffer is already on the list, skip it */
+	list_for_each_entry(obj, &snapshot->obj_list, node) {
+		/* combine the range with existing object if they overlap */
+		if (obj->entry->priv == process && obj->type == type &&
+			kgsl_addr_range_overlap(obj->gpuaddr, obj->size,
+				gpuaddr, size)) {
+			uint64_t end1 = obj->gpuaddr + obj->size;
+			uint64_t end2 = gpuaddr + size;
+
+			if (obj->gpuaddr > gpuaddr)
+				obj->gpuaddr = gpuaddr;
+			if (end1 > end2)
+				obj->size = end1 - obj->gpuaddr;
+			else
+				obj->size = end2 - obj->gpuaddr;
+			obj->offset = obj->gpuaddr - entry->memdesc.gpuaddr;
+			ret = 0;
+			goto err_put;
+		}
+	}
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+
+	if (obj == NULL)
+		goto err_put;
+
+	obj->type = type;
+	obj->entry = entry;
+	obj->gpuaddr = gpuaddr;
+	obj->size = size;
+	obj->offset = offset;
+
+	list_add(&obj->node, &snapshot->obj_list);
+
+	/*
+	 * Return the size of the entire mem entry that was frozen - this gets
+	 * used for tracking how much memory is frozen for a hang.  Also, mark
+	 * the memory entry as frozen. If the entry was already marked as
+	 * frozen, then another buffer already got to it.  In that case, return
+	 * 0 so it doesn't get counted twice
+	 */
+
+	ret = (entry->memdesc.priv & KGSL_MEMDESC_FROZEN) ? 0
+		: entry->memdesc.size;
+
+	entry->memdesc.priv |= KGSL_MEMDESC_FROZEN;
+
+	return ret;
+err_put:
+	kgsl_mem_entry_put(entry);
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_snapshot_get_object);
+
+/**
+ * kgsl_snapshot_dump_registers - helper function to dump device registers
+ * @device - the device to dump registers from
+ * @snapshot - pointer to the start of the region of memory for the snapshot
+ * @remain - a pointer to the number of bytes remaining in the snapshot
+ * @priv - A pointer to the kgsl_snapshot_registers data
+ *
+ * Given an array of register ranges pairs (start,end [inclusive]), dump the
+ * registers into a snapshot register section.  The snapshot region stores a
+ * part of dwords for each register - the word address of the register, and
+ * the value.
+ */
+size_t kgsl_snapshot_dump_registers(struct kgsl_device *device, u8 *buf,
+	size_t remain, void *priv)
+{
+	struct kgsl_snapshot_regs *header = (struct kgsl_snapshot_regs *)buf;
+	struct kgsl_snapshot_registers *regs = priv;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int count = 0, j, k;
+
+	/* Figure out how many registers we are going to dump */
+
+	for (j = 0; j < regs->count; j++) {
+		int start = regs->regs[j * 2];
+		int end = regs->regs[j * 2 + 1];
+
+		count += (end - start + 1);
+	}
+
+	if (remain < (count * 8) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "REGISTERS");
+		return 0;
+	}
+
+	for (j = 0; j < regs->count; j++) {
+		unsigned int start = regs->regs[j * 2];
+		unsigned int end = regs->regs[j * 2 + 1];
+
+		for (k = start; k <= end; k++) {
+			unsigned int val;
+
+			kgsl_regread(device, k, &val);
+			*data++ = k;
+			*data++ = val;
+		}
+	}
+
+	header->count = count;
+
+	/* Return the size of the section */
+	return (count * 8) + sizeof(*header);
+}
+EXPORT_SYMBOL(kgsl_snapshot_dump_registers);
+
+struct kgsl_snapshot_indexed_registers {
+	unsigned int index;
+	unsigned int data;
+	unsigned int start;
+	unsigned int count;
+};
+
+static size_t kgsl_snapshot_dump_indexed_regs(struct kgsl_device *device,
+	u8 *buf, size_t remain, void *priv)
+{
+	struct kgsl_snapshot_indexed_registers *iregs = priv;
+	struct kgsl_snapshot_indexed_regs *header =
+		(struct kgsl_snapshot_indexed_regs *)buf;
+	unsigned int *data = (unsigned int *)(buf + sizeof(*header));
+	int i;
+
+	if (remain < (iregs->count * 4) + sizeof(*header)) {
+		SNAPSHOT_ERR_NOMEM(device, "INDEXED REGS");
+		return 0;
+	}
+
+	header->index_reg = iregs->index;
+	header->data_reg = iregs->data;
+	header->count = iregs->count;
+	header->start = iregs->start;
+
+	for (i = 0; i < iregs->count; i++) {
+		kgsl_regwrite(device, iregs->index, iregs->start + i);
+		kgsl_regread(device, iregs->data, &data[i]);
+	}
+
+	return (iregs->count * 4) + sizeof(*header);
+}
+
+/**
+ * kgsl_snapshot_indexed_registers - Add a set of indexed registers to the
+ * snapshot
+ * @device: Pointer to the KGSL device being snapshotted
+ * @snapshot: Snapshot instance
+ * @index: Offset for the index register
+ * @data: Offset for the data register
+ * @start: Index to start reading
+ * @count: Number of entries to read
+ *
+ * Dump the values from an indexed register group into the snapshot
+ */
+void kgsl_snapshot_indexed_registers(struct kgsl_device *device,
+		struct kgsl_snapshot *snapshot,
+		unsigned int index, unsigned int data,
+		unsigned int start,
+		unsigned int count)
+{
+	struct kgsl_snapshot_indexed_registers iregs;
+
+	iregs.index = index;
+	iregs.data = data;
+	iregs.start = start;
+	iregs.count = count;
+
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_INDEXED_REGS,
+		snapshot, kgsl_snapshot_dump_indexed_regs, &iregs);
+}
+EXPORT_SYMBOL(kgsl_snapshot_indexed_registers);
+
+/**
+ * kgsl_snapshot_add_section() - Add a new section to the GPU snapshot
+ * @device: the KGSL device being snapshotted
+ * @id: the section id
+ * @snapshot: pointer to the snapshot instance
+ * @func:  Function pointer to fill the section
+ * @priv: Private pointer to pass to the function
+ *
+ * Set up a KGSL snapshot header by filling the memory with the callback
+ * function and adding the standard section header
+ */
+void kgsl_snapshot_add_section(struct kgsl_device *device, u16 id,
+	struct kgsl_snapshot *snapshot,
+	size_t (*func)(struct kgsl_device *, u8 *, size_t, void *),
+	void *priv)
+{
+	struct kgsl_snapshot_section_header *header =
+		(struct kgsl_snapshot_section_header *)snapshot->ptr;
+	u8 *data = snapshot->ptr + sizeof(*header);
+	size_t ret = 0;
+
+	/*
+	 * Sanity check to make sure there is enough for the header.  The
+	 * callback will check to make sure there is enough for the rest
+	 * of the data.  If there isn't enough room then don't advance the
+	 * pointer.
+	 */
+
+	if (snapshot->remain < sizeof(*header))
+		return;
+
+	/* It is legal to have no function (i.e. - make an empty section) */
+	if (func) {
+		ret = func(device, data, snapshot->remain - sizeof(*header),
+			priv);
+
+		/*
+		 * If there wasn't enough room for the data then don't bother
+		 * setting up the header.
+		 */
+
+		if (ret == 0)
+			return;
+	}
+
+	header->magic = SNAPSHOT_SECTION_MAGIC;
+	header->id = id;
+	header->size = ret + sizeof(*header);
+
+	snapshot->ptr += header->size;
+	snapshot->remain -= header->size;
+	snapshot->size += header->size;
+}
+
+/**
+ * kgsl_snapshot() - construct a device snapshot
+ * @device: device to snapshot
+ * @context: the context that is hung, might be NULL if unknown.
+ *
+ * Given a device, construct a binary snapshot dump of the current device state
+ * and store it in the device snapshot memory.
+ */
+void kgsl_device_snapshot(struct kgsl_device *device,
+		struct kgsl_context *context)
+{
+	struct kgsl_snapshot_header *header = device->snapshot_memory.ptr;
+	struct kgsl_snapshot *snapshot;
+	struct timespec boot;
+	phys_addr_t pa;
+
+	if (device->snapshot_memory.ptr == NULL) {
+		KGSL_DRV_ERR(device,
+			"snapshot: no snapshot memory available\n");
+		return;
+	}
+
+	if (WARN(!kgsl_state_is_awake(device),
+		"snapshot: device is powered off\n"))
+		return;
+
+	/* increment the hang count for good book keeping */
+	device->snapshot_faultcount++;
+
+	/*
+	 * The first hang is always the one we are interested in. Don't capture
+	 * a new snapshot instance if the old one hasn't been grabbed yet
+	 */
+	if (device->snapshot != NULL)
+		return;
+
+	/* Allocate memory for the snapshot instance */
+	snapshot = kzalloc(sizeof(*snapshot), GFP_KERNEL);
+	if (snapshot == NULL)
+		return;
+
+	init_completion(&snapshot->dump_gate);
+	INIT_LIST_HEAD(&snapshot->obj_list);
+	INIT_LIST_HEAD(&snapshot->cp_list);
+	INIT_WORK(&snapshot->work, kgsl_snapshot_save_frozen_objs);
+
+	snapshot->start = device->snapshot_memory.ptr;
+	snapshot->ptr = device->snapshot_memory.ptr;
+	snapshot->remain = device->snapshot_memory.size;
+	atomic_set(&snapshot->sysfs_read, 0);
+
+	header = (struct kgsl_snapshot_header *) snapshot->ptr;
+
+	header->magic = SNAPSHOT_MAGIC;
+	header->gpuid = kgsl_gpuid(device, &header->chipid);
+
+	snapshot->ptr += sizeof(*header);
+	snapshot->remain -= sizeof(*header);
+	snapshot->size += sizeof(*header);
+
+	/* Build the Linux specific header */
+	kgsl_snapshot_add_section(device, KGSL_SNAPSHOT_SECTION_OS,
+			snapshot, snapshot_os, NULL);
+
+	/* Get the device specific sections */
+	if (device->ftbl->snapshot)
+		device->ftbl->snapshot(device, snapshot, context);
+
+	/*
+	 * The timestamp is the seconds since boot so it is easier to match to
+	 * the kernel log
+	 */
+
+	getboottime(&boot);
+	snapshot->timestamp = get_seconds() - boot.tv_sec;
+
+	/* Store the instance in the device until it gets dumped */
+	device->snapshot = snapshot;
+
+	/* log buffer info to aid in ramdump fault tolerance */
+	pa = __pa(device->snapshot_memory.ptr);
+	KGSL_DRV_ERR(device, "snapshot created at pa %pa size %zd\n",
+			&pa, snapshot->size);
+
+	sysfs_notify(&device->snapshot_kobj, NULL, "timestamp");
+
+	/*
+	 * Queue a work item that will save the IB data in snapshot into
+	 * static memory to prevent loss of data due to overwriting of
+	 * memory.
+	 *
+	 */
+	kgsl_schedule_work(&snapshot->work);
+}
+EXPORT_SYMBOL(kgsl_device_snapshot);
+
+/* An attribute for showing snapshot details */
+struct kgsl_snapshot_attribute {
+	struct attribute attr;
+	ssize_t (*show)(struct kgsl_device *device, char *buf);
+	ssize_t (*store)(struct kgsl_device *device, const char *buf,
+		size_t count);
+};
+
+/**
+ * kgsl_snapshot_process_ib_obj_list() - Go through the list of IB's which need
+ * to be dumped for snapshot and move them to the global snapshot list so
+ * they will get dumped when the global list is dumped
+ * @device: device being snapshotted
+ */
+static void kgsl_snapshot_process_ib_obj_list(struct kgsl_snapshot *snapshot)
+{
+	struct kgsl_snapshot_cp_obj *obj, *obj_temp;
+	struct adreno_ib_object *ib_obj;
+	int i;
+
+	list_for_each_entry_safe(obj, obj_temp, &snapshot->cp_list,
+			node) {
+		for (i = 0; i < obj->ib_obj_list->num_objs; i++) {
+			ib_obj = &(obj->ib_obj_list->obj_list[i]);
+			kgsl_snapshot_get_object(snapshot, ib_obj->entry->priv,
+				ib_obj->gpuaddr, ib_obj->size,
+				ib_obj->snapshot_obj_type);
+		}
+		list_del(&obj->node);
+		adreno_ib_destroy_obj_list(obj->ib_obj_list);
+		kfree(obj);
+	}
+}
+
+#define to_snapshot_attr(a) \
+container_of(a, struct kgsl_snapshot_attribute, attr)
+
+#define kobj_to_device(a) \
+container_of(a, struct kgsl_device, snapshot_kobj)
+
+/* Dump the sysfs binary data to the user */
+static ssize_t snapshot_show(struct file *filep, struct kobject *kobj,
+	struct bin_attribute *attr, char *buf, loff_t off,
+	size_t count)
+{
+	struct kgsl_device *device = kobj_to_device(kobj);
+	struct kgsl_snapshot *snapshot;
+	struct kgsl_snapshot_object *obj, *tmp;
+	struct kgsl_snapshot_section_header head;
+	struct snapshot_obj_itr itr;
+	int ret;
+
+	if (device == NULL)
+		return 0;
+
+	mutex_lock(&device->mutex);
+	snapshot = device->snapshot;
+	if (snapshot != NULL)
+		atomic_inc(&snapshot->sysfs_read);
+	mutex_unlock(&device->mutex);
+
+	/* Return nothing if we haven't taken a snapshot yet */
+	if (snapshot == NULL)
+		return 0;
+
+	/*
+	 * Wait for the dump worker to finish. This is interruptible
+	 * to allow userspace to bail if things go horribly wrong.
+	 */
+	ret = wait_for_completion_interruptible(&snapshot->dump_gate);
+	if (ret) {
+		atomic_dec(&snapshot->sysfs_read);
+		return ret;
+	}
+
+	obj_itr_init(&itr, buf, off, count);
+
+	ret = obj_itr_out(&itr, snapshot->start, snapshot->size);
+	if (ret == 0)
+		goto done;
+
+	/* Dump the memory pool if it exists */
+	if (snapshot->mempool) {
+		ret = obj_itr_out(&itr, snapshot->mempool,
+				snapshot->mempool_size);
+		if (ret == 0)
+			goto done;
+	}
+
+	{
+		head.magic = SNAPSHOT_SECTION_MAGIC;
+		head.id = KGSL_SNAPSHOT_SECTION_END;
+		head.size = sizeof(head);
+
+		obj_itr_out(&itr, &head, sizeof(head));
+	}
+
+	/*
+	 * Make sure everything has been written out before destroying things.
+	 * The best way to confirm this is to go all the way through without
+	 * writing any bytes - so only release if we get this far and
+	 * itr->write is 0 and there are no concurrent reads pending
+	 */
+
+	if (itr.write == 0) {
+		bool snapshot_free = false;
+
+		mutex_lock(&device->mutex);
+		if (atomic_dec_and_test(&snapshot->sysfs_read)) {
+			device->snapshot = NULL;
+			snapshot_free = true;
+		}
+		mutex_unlock(&device->mutex);
+
+		if (snapshot_free) {
+			list_for_each_entry_safe(obj, tmp,
+						&snapshot->obj_list, node)
+				kgsl_snapshot_put_object(obj);
+
+			if (snapshot->mempool)
+				vfree(snapshot->mempool);
+
+			kfree(snapshot);
+			KGSL_CORE_ERR("snapshot: objects released\n");
+		}
+		return 0;
+	}
+
+done:
+	atomic_dec(&snapshot->sysfs_read);
+	return itr.write;
+}
+
+/* Show the total number of hangs since device boot */
+static ssize_t faultcount_show(struct kgsl_device *device, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", device->snapshot_faultcount);
+}
+
+/* Reset the total number of hangs since device boot */
+static ssize_t faultcount_store(struct kgsl_device *device, const char *buf,
+	size_t count)
+{
+	if (device && count > 0)
+		device->snapshot_faultcount = 0;
+
+	return count;
+}
+
+/* Show the force_panic request status */
+static ssize_t force_panic_show(struct kgsl_device *device, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", device->force_panic);
+}
+
+/* Store the panic request value to force_panic */
+static ssize_t force_panic_store(struct kgsl_device *device, const char *buf,
+	size_t count)
+{
+	unsigned int val = 0;
+	int ret;
+
+	if (device && count > 0)
+		device->force_panic = 0;
+
+	ret = kgsl_sysfs_store(buf, &val);
+
+	if (!ret && device)
+		device->force_panic = (bool)val;
+
+	return (ssize_t) ret < 0 ? ret : count;
+}
+
+/* Show the snapshot_crashdumper request status */
+static ssize_t snapshot_crashdumper_show(struct kgsl_device *device, char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "%d\n", device->snapshot_crashdumper);
+}
+
+
+/* Store the value to snapshot_crashdumper*/
+static ssize_t snapshot_crashdumper_store(struct kgsl_device *device,
+	const char *buf, size_t count)
+{
+	unsigned int val = 0;
+	int ret;
+
+	if (device && count > 0)
+		device->snapshot_crashdumper = 1;
+
+	ret = kgsl_sysfs_store(buf, &val);
+
+	if (!ret && device)
+		device->snapshot_crashdumper = (bool)val;
+
+	return (ssize_t) ret < 0 ? ret : count;
+}
+
+/* Show the timestamp of the last collected snapshot */
+static ssize_t timestamp_show(struct kgsl_device *device, char *buf)
+{
+	unsigned long timestamp =
+		device->snapshot ? device->snapshot->timestamp : 0;
+
+	return snprintf(buf, PAGE_SIZE, "%lu\n", timestamp);
+}
+
+static struct bin_attribute snapshot_attr = {
+	.attr.name = "dump",
+	.attr.mode = 0444,
+	.size = 0,
+	.read = snapshot_show
+};
+
+#define SNAPSHOT_ATTR(_name, _mode, _show, _store) \
+struct kgsl_snapshot_attribute attr_##_name = { \
+	.attr = { .name = __stringify(_name), .mode = _mode }, \
+	.show = _show, \
+	.store = _store, \
+}
+
+static SNAPSHOT_ATTR(timestamp, 0444, timestamp_show, NULL);
+static SNAPSHOT_ATTR(faultcount, 0644, faultcount_show, faultcount_store);
+static SNAPSHOT_ATTR(force_panic, 0644, force_panic_show, force_panic_store);
+static SNAPSHOT_ATTR(snapshot_crashdumper, 0644, snapshot_crashdumper_show,
+	snapshot_crashdumper_store);
+
+static ssize_t snapshot_sysfs_show(struct kobject *kobj,
+	struct attribute *attr, char *buf)
+{
+	struct kgsl_snapshot_attribute *pattr = to_snapshot_attr(attr);
+	struct kgsl_device *device = kobj_to_device(kobj);
+	ssize_t ret;
+
+	if (device && pattr->show)
+		ret = pattr->show(device, buf);
+	else
+		ret = -EIO;
+
+	return ret;
+}
+
+static ssize_t snapshot_sysfs_store(struct kobject *kobj,
+	struct attribute *attr, const char *buf, size_t count)
+{
+	struct kgsl_snapshot_attribute *pattr = to_snapshot_attr(attr);
+	struct kgsl_device *device = kobj_to_device(kobj);
+	ssize_t ret;
+
+	if (device && pattr->store)
+		ret = pattr->store(device, buf, count);
+	else
+		ret = -EIO;
+
+	return ret;
+}
+
+static const struct sysfs_ops snapshot_sysfs_ops = {
+	.show = snapshot_sysfs_show,
+	.store = snapshot_sysfs_store,
+};
+
+static struct kobj_type ktype_snapshot = {
+	.sysfs_ops = &snapshot_sysfs_ops,
+};
+
+/**
+ * kgsl_device_snapshot_init() - add resources for the device GPU snapshot
+ * @device: The device to initialize
+ *
+ * Allocate memory for a GPU snapshot for the specified device,
+ * and create the sysfs files to manage it
+ */
+int kgsl_device_snapshot_init(struct kgsl_device *device)
+{
+	int ret;
+
+	if (kgsl_property_read_u32(device, "qcom,snapshot-size",
+		(unsigned int *) &(device->snapshot_memory.size)))
+		device->snapshot_memory.size = KGSL_SNAPSHOT_MEMSIZE;
+
+	/*
+	 * Choosing a memory size of 0 is essentially the same as disabling
+	 * snapshotting
+	 */
+	if (device->snapshot_memory.size == 0)
+		return 0;
+
+	/*
+	 * I'm not sure why anybody would choose to do so but make sure
+	 * that we can at least fit the snapshot header in the requested
+	 * region
+	 */
+	if (device->snapshot_memory.size < sizeof(struct kgsl_snapshot_header))
+		device->snapshot_memory.size =
+			sizeof(struct kgsl_snapshot_header);
+
+	device->snapshot_memory.ptr = kzalloc(device->snapshot_memory.size,
+		GFP_KERNEL);
+
+	if (device->snapshot_memory.ptr == NULL)
+		return -ENOMEM;
+
+	device->snapshot = NULL;
+	device->snapshot_faultcount = 0;
+	device->force_panic = 0;
+	device->snapshot_crashdumper = 1;
+
+	ret = kobject_init_and_add(&device->snapshot_kobj, &ktype_snapshot,
+		&device->dev->kobj, "snapshot");
+	if (ret)
+		goto done;
+
+	ret = sysfs_create_bin_file(&device->snapshot_kobj, &snapshot_attr);
+	if (ret)
+		goto done;
+
+	ret  = sysfs_create_file(&device->snapshot_kobj, &attr_timestamp.attr);
+	if (ret)
+		goto done;
+
+	ret  = sysfs_create_file(&device->snapshot_kobj, &attr_faultcount.attr);
+	if (ret)
+		goto done;
+
+	ret  = sysfs_create_file(&device->snapshot_kobj,
+			&attr_force_panic.attr);
+	if (ret)
+		goto done;
+
+	ret  = sysfs_create_file(&device->snapshot_kobj,
+			&attr_snapshot_crashdumper.attr);
+done:
+	return ret;
+}
+EXPORT_SYMBOL(kgsl_device_snapshot_init);
+
+/**
+ * kgsl_device_snapshot_close() - take down snapshot memory for a device
+ * @device: Pointer to the kgsl_device
+ *
+ * Remove the sysfs files and free the memory allocated for the GPU
+ * snapshot
+ */
+void kgsl_device_snapshot_close(struct kgsl_device *device)
+{
+	sysfs_remove_bin_file(&device->snapshot_kobj, &snapshot_attr);
+	sysfs_remove_file(&device->snapshot_kobj, &attr_timestamp.attr);
+
+	kobject_put(&device->snapshot_kobj);
+
+	kfree(device->snapshot_memory.ptr);
+
+	device->snapshot_memory.ptr = NULL;
+	device->snapshot_memory.size = 0;
+	device->snapshot_faultcount = 0;
+	device->force_panic = 0;
+	device->snapshot_crashdumper = 1;
+}
+EXPORT_SYMBOL(kgsl_device_snapshot_close);
+
+/**
+ * kgsl_snapshot_add_ib_obj_list() - Add a IB object list to the snapshot
+ * object list
+ * @device: the device that is being snapshotted
+ * @ib_obj_list: The IB list that has objects required to execute an IB
+ * @num_objs: Number of IB objects
+ * @ptbase: The pagetable base in which the IB is mapped
+ *
+ * Adds a new IB to the list of IB objects maintained when getting snapshot
+ * Returns 0 on success else -ENOMEM on error
+ */
+int kgsl_snapshot_add_ib_obj_list(struct kgsl_snapshot *snapshot,
+	struct adreno_ib_object_list *ib_obj_list)
+{
+	struct kgsl_snapshot_cp_obj *obj;
+
+	obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+	if (!obj)
+		return -ENOMEM;
+	obj->ib_obj_list = ib_obj_list;
+	list_add(&obj->node, &snapshot->cp_list);
+	return 0;
+}
+
+static size_t _mempool_add_object(struct kgsl_snapshot *snapshot, u8 *data,
+		struct kgsl_snapshot_object *obj)
+{
+	struct kgsl_snapshot_section_header *section =
+		(struct kgsl_snapshot_section_header *)data;
+	struct kgsl_snapshot_gpu_object_v2 *header =
+		(struct kgsl_snapshot_gpu_object_v2 *)(data + sizeof(*section));
+	u8 *dest = data + sizeof(*section) + sizeof(*header);
+	uint64_t size;
+
+	size = obj->size;
+
+	if (!kgsl_memdesc_map(&obj->entry->memdesc)) {
+		KGSL_CORE_ERR("snapshot: failed to map GPU object\n");
+		return 0;
+	}
+
+	section->magic = SNAPSHOT_SECTION_MAGIC;
+	section->id = KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2;
+	section->size = size + sizeof(*header) + sizeof(*section);
+
+	header->size = size >> 2;
+	header->gpuaddr = obj->gpuaddr;
+	header->ptbase =
+		kgsl_mmu_pagetable_get_ttbr0(obj->entry->priv->pagetable);
+	header->type = obj->type;
+
+	if (kgsl_addr_range_overlap(obj->gpuaddr, obj->size,
+				snapshot->ib1base, snapshot->ib1size))
+		snapshot->ib1dumped = true;
+
+	if (kgsl_addr_range_overlap(obj->gpuaddr, obj->size,
+				snapshot->ib2base, snapshot->ib2size))
+		snapshot->ib2dumped = true;
+
+	memcpy(dest, obj->entry->memdesc.hostptr + obj->offset, size);
+	kgsl_memdesc_unmap(&obj->entry->memdesc);
+
+	return section->size;
+}
+
+/**
+ * kgsl_snapshot_save_frozen_objs() - Save the objects frozen in snapshot into
+ * memory so that the data reported in these objects is correct when snapshot
+ * is taken
+ * @work: The work item that scheduled this work
+ */
+void kgsl_snapshot_save_frozen_objs(struct work_struct *work)
+{
+	struct kgsl_snapshot *snapshot = container_of(work,
+				struct kgsl_snapshot, work);
+	struct kgsl_device *device = kgsl_get_device(KGSL_DEVICE_3D0);
+	struct kgsl_snapshot_object *obj, *tmp;
+	size_t size = 0;
+	void *ptr;
+
+	if (IS_ERR_OR_NULL(device))
+		return;
+
+	kgsl_snapshot_process_ib_obj_list(snapshot);
+
+	list_for_each_entry(obj, &snapshot->obj_list, node) {
+		obj->size = ALIGN(obj->size, 4);
+
+		size += ((size_t) obj->size +
+			sizeof(struct kgsl_snapshot_gpu_object_v2) +
+			sizeof(struct kgsl_snapshot_section_header));
+	}
+
+	if (size == 0)
+		goto done;
+
+	snapshot->mempool = vmalloc(size);
+
+	ptr = snapshot->mempool;
+	snapshot->mempool_size = 0;
+
+	/* even if vmalloc fails, make sure we clean up the obj_list */
+	list_for_each_entry_safe(obj, tmp, &snapshot->obj_list, node) {
+		if (snapshot->mempool) {
+			size_t ret = _mempool_add_object(snapshot, ptr, obj);
+
+			ptr += ret;
+			snapshot->mempool_size += ret;
+		}
+
+		kgsl_snapshot_put_object(obj);
+	}
+done:
+	/*
+	 * Get rid of the process struct here, so that it doesn't sit
+	 * around until someone bothers to read the snapshot file.
+	 */
+	kgsl_process_private_put(snapshot->process);
+	snapshot->process = NULL;
+
+	if (snapshot->ib1base && !snapshot->ib1dumped)
+		KGSL_DRV_ERR(device,
+				"snapshot: Active IB1:%016llx not dumped\n",
+				snapshot->ib1base);
+	else if (snapshot->ib2base && !snapshot->ib2dumped)
+		KGSL_DRV_ERR(device,
+			       "snapshot: Active IB2:%016llx not dumped\n",
+				snapshot->ib2base);
+
+	complete_all(&snapshot->dump_gate);
+	BUG_ON(device->force_panic);
+}
diff --git a/drivers/gpu/msm/kgsl_snapshot.h b/drivers/gpu/msm/kgsl_snapshot.h
new file mode 100644
index 0000000..e2ded87
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_snapshot.h
@@ -0,0 +1,258 @@
+/* Copyright (c) 2012-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef _KGSL_SNAPSHOT_H_
+#define _KGSL_SNAPSHOT_H_
+
+#include <linux/types.h>
+
+/* Snapshot header */
+
+/* High word is static, low word is snapshot version ID */
+#define SNAPSHOT_MAGIC 0x504D0002
+
+/* GPU ID scheme:
+ * [16:31] - core identifer (0x0002 for 2D or 0x0003 for 3D)
+ * [00:16] - GPU specific identifier
+ */
+
+struct kgsl_snapshot_header {
+	__u32 magic; /* Magic identifier */
+	__u32 gpuid; /* GPU ID - see above */
+	/* Added in snapshot version 2 */
+	__u32 chipid; /* Chip ID from the GPU */
+} __packed;
+
+/* Section header */
+#define SNAPSHOT_SECTION_MAGIC 0xABCD
+
+struct kgsl_snapshot_section_header {
+	__u16 magic; /* Magic identifier */
+	__u16 id;    /* Type of section */
+	__u32 size;  /* Size of the section including this header */
+} __packed;
+
+/* Section identifiers */
+#define KGSL_SNAPSHOT_SECTION_OS           0x0101
+#define KGSL_SNAPSHOT_SECTION_REGS         0x0201
+#define KGSL_SNAPSHOT_SECTION_RB           0x0301
+#define KGSL_SNAPSHOT_SECTION_RB_V2        0x0302
+#define KGSL_SNAPSHOT_SECTION_IB           0x0401
+#define KGSL_SNAPSHOT_SECTION_IB_V2        0x0402
+#define KGSL_SNAPSHOT_SECTION_INDEXED_REGS 0x0501
+#define KGSL_SNAPSHOT_SECTION_ISTORE       0x0801
+#define KGSL_SNAPSHOT_SECTION_DEBUG        0x0901
+#define KGSL_SNAPSHOT_SECTION_DEBUGBUS     0x0A01
+#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT   0x0B01
+#define KGSL_SNAPSHOT_SECTION_GPU_OBJECT_V2 0x0B02
+#define KGSL_SNAPSHOT_SECTION_MEMLIST      0x0E01
+#define KGSL_SNAPSHOT_SECTION_MEMLIST_V2   0x0E02
+#define KGSL_SNAPSHOT_SECTION_SHADER       0x1201
+
+#define KGSL_SNAPSHOT_SECTION_END          0xFFFF
+
+/* OS sub-section header */
+#define KGSL_SNAPSHOT_OS_LINUX             0x0001
+#define KGSL_SNAPSHOT_OS_LINUX_V3          0x00000202
+
+/* Linux OS specific information */
+struct kgsl_snapshot_linux {
+	int osid;                   /* subsection OS identifier */
+	int state;		    /* 1 if the thread is running, 0 for hung */
+	__u32 seconds;		    /* Unix timestamp for the snapshot */
+	__u32 power_flags;            /* Current power flags */
+	__u32 power_level;            /* Current power level */
+	__u32 power_interval_timeout; /* Power interval timeout */
+	__u32 grpclk;                 /* Current GP clock value */
+	__u32 busclk;		    /* Current busclk value */
+	__u32 ptbase;		    /* Current ptbase */
+	__u32 pid;		    /* PID of the process that owns the PT */
+	__u32 current_context;	    /* ID of the current context */
+	__u32 ctxtcount;	    /* Number of contexts appended to section */
+	unsigned char release[32];  /* kernel release */
+	unsigned char version[32];  /* kernel version */
+	unsigned char comm[16];	    /* Name of the process that owns the PT */
+} __packed;
+
+struct kgsl_snapshot_linux_v2 {
+	int osid;                   /* subsection OS identifier */
+	__u32 seconds;		    /* Unix timestamp for the snapshot */
+	__u32 power_flags;            /* Current power flags */
+	__u32 power_level;            /* Current power level */
+	__u32 power_interval_timeout; /* Power interval timeout */
+	__u32 grpclk;                 /* Current GP clock value */
+	__u32 busclk;		    /* Current busclk value */
+	__u64 ptbase;		    /* Current ptbase */
+	__u32 pid;		    /* PID of the process that owns the PT */
+	__u32 current_context;	    /* ID of the current context */
+	__u32 ctxtcount;	    /* Number of contexts appended to section */
+	unsigned char release[32];  /* kernel release */
+	unsigned char version[32];  /* kernel version */
+	unsigned char comm[16];	    /* Name of the process that owns the PT */
+} __packed;
+
+/*
+ * This structure contains a record of an active context.
+ * These are appended one after another in the OS section below
+ * the header above
+ */
+
+struct kgsl_snapshot_linux_context {
+	__u32 id;			/* The context ID */
+	__u32 timestamp_queued;		/* The last queued timestamp */
+	__u32 timestamp_retired;	/* The last timestamp retired by HW */
+};
+
+struct kgsl_snapshot_linux_context_v2 {
+	__u32 id;			/* The context ID */
+	__u32 timestamp_queued;		/* The last queued timestamp */
+	__u32 timestamp_consumed;	/* The last timestamp consumed by HW */
+	__u32 timestamp_retired;	/* The last timestamp retired by HW */
+};
+/* Ringbuffer sub-section header */
+struct kgsl_snapshot_rb {
+	int start;  /* dword at the start of the dump */
+	int end;    /* dword at the end of the dump */
+	int rbsize; /* Size (in dwords) of the ringbuffer */
+	int wptr;   /* Current index of the CPU write pointer */
+	int rptr;   /* Current index of the GPU read pointer */
+	int count;  /* Number of dwords in the dump */
+	__u32 timestamp_queued; /* The last queued timestamp */
+	__u32 timestamp_retired; /* The last timestamp retired by HW */
+} __packed;
+
+struct kgsl_snapshot_rb_v2 {
+	int start;  /* dword at the start of the dump */
+	int end;    /* dword at the end of the dump */
+	int rbsize; /* Size (in dwords) of the ringbuffer */
+	int wptr;   /* Current index of the CPU write pointer */
+	int rptr;   /* Current index of the GPU read pointer */
+	int count;  /* Number of dwords in the dump */
+	__u32 timestamp_queued; /* The last queued timestamp */
+	__u32 timestamp_retired; /* The last timestamp retired by HW */
+	__u64 gpuaddr; /* The GPU address of the ringbuffer */
+	__u32 id; /* Ringbuffer identifier */
+} __packed;
+
+
+/* Replay or Memory list section, both sections have same header */
+struct kgsl_snapshot_replay_mem_list {
+	/*
+	 * Number of IBs to replay for replay section or
+	 * number of memory list entries for mem list section
+	 */
+	int num_entries;
+	/* Pagetable base to which the replay IBs or memory entries belong */
+	__u32 ptbase;
+} __packed;
+
+/* Replay or Memory list section, both sections have same header */
+struct kgsl_snapshot_mem_list_v2 {
+	/*
+	 * Number of IBs to replay for replay section or
+	 * number of memory list entries for mem list section
+	 */
+	int num_entries;
+	/* Pagetable base to which the replay IBs or memory entries belong */
+	__u64 ptbase;
+} __packed;
+
+
+/* Indirect buffer sub-section header */
+struct kgsl_snapshot_ib {
+	__u32 gpuaddr; /* GPU address of the the IB */
+	__u32 ptbase;  /* Base for the pagetable the GPU address is valid in */
+	int size;    /* Size of the IB */
+} __packed;
+
+/* Indirect buffer sub-section header (v2) */
+struct kgsl_snapshot_ib_v2 {
+	__u64 gpuaddr; /* GPU address of the the IB */
+	__u64 ptbase;  /* Base for the pagetable the GPU address is valid in */
+	__u64 size;    /* Size of the IB */
+} __packed;
+
+
+/* Register sub-section header */
+struct kgsl_snapshot_regs {
+	__u32 count; /* Number of register pairs in the section */
+} __packed;
+
+/* Indexed register sub-section header */
+struct kgsl_snapshot_indexed_regs {
+	__u32 index_reg; /* Offset of the index register for this section */
+	__u32 data_reg;  /* Offset of the data register for this section */
+	int start;     /* Starting index */
+	int count;     /* Number of dwords in the data */
+} __packed;
+
+/* Istore sub-section header */
+struct kgsl_snapshot_istore {
+	int count;   /* Number of instructions in the istore */
+} __packed;
+
+/* Debug data sub-section header */
+
+/* A2XX debug sections */
+#define SNAPSHOT_DEBUG_SX         1
+#define SNAPSHOT_DEBUG_CP         2
+#define SNAPSHOT_DEBUG_SQ         3
+#define SNAPSHOT_DEBUG_SQTHREAD   4
+#define SNAPSHOT_DEBUG_MIU        5
+
+/* A3XX debug sections */
+#define SNAPSHOT_DEBUG_VPC_MEMORY 6
+#define SNAPSHOT_DEBUG_CP_MEQ     7
+#define SNAPSHOT_DEBUG_CP_PM4_RAM 8
+#define SNAPSHOT_DEBUG_CP_PFP_RAM 9
+#define SNAPSHOT_DEBUG_CP_ROQ     10
+#define SNAPSHOT_DEBUG_SHADER_MEMORY 11
+#define SNAPSHOT_DEBUG_CP_MERCIU 12
+
+struct kgsl_snapshot_debug {
+	int type;    /* Type identifier for the attached tata */
+	int size;   /* Size of the section in dwords */
+} __packed;
+
+struct kgsl_snapshot_debugbus {
+	int id;	   /* Debug bus ID */
+	int count; /* Number of dwords in the dump */
+} __packed;
+
+struct kgsl_snapshot_shader {
+	int type;  /* SP/TP statetype */
+	int index; /* SP/TP index */
+	int size;  /* Number of dwords in the dump */
+} __packed;
+
+#define SNAPSHOT_GPU_OBJECT_SHADER  1
+#define SNAPSHOT_GPU_OBJECT_IB      2
+#define SNAPSHOT_GPU_OBJECT_GENERIC 3
+#define SNAPSHOT_GPU_OBJECT_DRAW    4
+#define SNAPSHOT_GPU_OBJECT_GLOBAL  5
+
+struct kgsl_snapshot_gpu_object {
+	int type;      /* Type of GPU object */
+	__u32 gpuaddr; /* GPU address of the the object */
+	__u32 ptbase;  /* Base for the pagetable the GPU address is valid in */
+	int size;    /* Size of the object (in dwords) */
+};
+
+struct kgsl_snapshot_gpu_object_v2 {
+	int type;      /* Type of GPU object */
+	__u64 gpuaddr; /* GPU address of the the object */
+	__u64 ptbase;  /* Base for the pagetable the GPU address is valid in */
+	__u64 size;    /* Size of the object (in dwords) */
+} __packed;
+
+#endif
diff --git a/drivers/gpu/msm/kgsl_sync.c b/drivers/gpu/msm/kgsl_sync.c
new file mode 100644
index 0000000..6752f3b7
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_sync.c
@@ -0,0 +1,646 @@
+/* Copyright (c) 2012-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/err.h>
+#include <linux/file.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include <asm/current.h>
+
+#include "kgsl_sync.h"
+
+static void kgsl_sync_timeline_signal(struct sync_timeline *timeline,
+	unsigned int timestamp);
+
+static struct sync_pt *kgsl_sync_pt_create(struct sync_timeline *timeline,
+	struct kgsl_context *context, unsigned int timestamp)
+{
+	struct sync_pt *pt;
+
+	pt = sync_pt_create(timeline, (int) sizeof(struct kgsl_sync_pt));
+	if (pt) {
+		struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) pt;
+
+		kpt->context = context;
+		kpt->timestamp = timestamp;
+	}
+	return pt;
+}
+
+/*
+ * This should only be called on sync_pts which have been created but
+ * not added to a fence.
+ */
+static void kgsl_sync_pt_destroy(struct sync_pt *pt)
+{
+	sync_pt_free(pt);
+}
+
+static struct sync_pt *kgsl_sync_pt_dup(struct sync_pt *pt)
+{
+	struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) pt;
+
+	return kgsl_sync_pt_create(sync_pt_parent(pt),
+				kpt->context, kpt->timestamp);
+}
+
+static int kgsl_sync_pt_has_signaled(struct sync_pt *pt)
+{
+	struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) pt;
+	struct kgsl_sync_timeline *ktimeline =
+		 (struct kgsl_sync_timeline *) sync_pt_parent(pt);
+	unsigned int ts = kpt->timestamp;
+	int ret = 0;
+
+	spin_lock(&ktimeline->lock);
+	ret = (timestamp_cmp(ktimeline->last_timestamp, ts) >= 0);
+	spin_unlock(&ktimeline->lock);
+
+	return ret;
+}
+
+static int kgsl_sync_pt_compare(struct sync_pt *a, struct sync_pt *b)
+{
+	struct kgsl_sync_pt *kpt_a = (struct kgsl_sync_pt *) a;
+	struct kgsl_sync_pt *kpt_b = (struct kgsl_sync_pt *) b;
+	unsigned int ts_a = kpt_a->timestamp;
+	unsigned int ts_b = kpt_b->timestamp;
+
+	return timestamp_cmp(ts_a, ts_b);
+}
+
+struct kgsl_fence_event_priv {
+	struct kgsl_context *context;
+	unsigned int timestamp;
+};
+
+/**
+ * kgsl_fence_event_cb - Event callback for a fence timestamp event
+ * @device - The KGSL device that expired the timestamp
+ * @context- Pointer to the context that owns the event
+ * @priv: Private data for the callback
+ * @result - Result of the event (retired or canceled)
+ *
+ * Signal a fence following the expiration of a timestamp
+ */
+
+static void kgsl_fence_event_cb(struct kgsl_device *device,
+		struct kgsl_event_group *group, void *priv, int result)
+{
+	struct kgsl_fence_event_priv *ev = priv;
+
+	kgsl_sync_timeline_signal(ev->context->timeline, ev->timestamp);
+	kgsl_context_put(ev->context);
+	kfree(ev);
+}
+
+static int _add_fence_event(struct kgsl_device *device,
+	struct kgsl_context *context, unsigned int timestamp)
+{
+	struct kgsl_fence_event_priv *event;
+	int ret;
+
+	event = kmalloc(sizeof(*event), GFP_KERNEL);
+	if (event == NULL)
+		return -ENOMEM;
+
+	/*
+	 * Increase the refcount for the context to keep it through the
+	 * callback
+	 */
+	if (!_kgsl_context_get(context)) {
+		kfree(event);
+		return -ENOENT;
+	}
+
+	event->context = context;
+	event->timestamp = timestamp;
+	event->context = context;
+
+	ret = kgsl_add_event(device, &context->events, timestamp,
+		kgsl_fence_event_cb, event);
+
+	if (ret) {
+		kgsl_context_put(context);
+		kfree(event);
+	}
+
+	return ret;
+}
+
+/**
+ * kgsl_add_fence_event - Create a new fence event
+ * @device - KGSL device to create the event on
+ * @timestamp - Timestamp to trigger the event
+ * @data - Return fence fd stored in struct kgsl_timestamp_event_fence
+ * @len - length of the fence event
+ * @owner - driver instance that owns this event
+ * @returns 0 on success or error code on error
+ *
+ * Create a fence and register an event to signal the fence when
+ * the timestamp expires
+ */
+
+int kgsl_add_fence_event(struct kgsl_device *device,
+	u32 context_id, u32 timestamp, void __user *data, int len,
+	struct kgsl_device_private *owner)
+{
+	struct kgsl_timestamp_event_fence priv;
+	struct kgsl_context *context;
+	struct sync_pt *pt;
+	struct sync_fence *fence = NULL;
+	int ret = -EINVAL;
+	char fence_name[sizeof(fence->name)] = {};
+	unsigned int cur;
+
+	priv.fence_fd = -1;
+
+	if (len != sizeof(priv))
+		return -EINVAL;
+
+	context = kgsl_context_get_owner(owner, context_id);
+
+	if (context == NULL)
+		return -EINVAL;
+
+	if (test_bit(KGSL_CONTEXT_PRIV_INVALID, &context->priv))
+		goto out;
+
+	pt = kgsl_sync_pt_create(context->timeline, context, timestamp);
+	if (pt == NULL) {
+		KGSL_DRV_CRIT_RATELIMIT(device, "kgsl_sync_pt_create failed\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+	snprintf(fence_name, sizeof(fence_name),
+		"%s-pid-%d-ctx-%d-ts-%d",
+		device->name, current->group_leader->pid,
+		context_id, timestamp);
+
+
+	fence = sync_fence_create(fence_name, pt);
+	if (fence == NULL) {
+		/* only destroy pt when not added to fence */
+		kgsl_sync_pt_destroy(pt);
+		KGSL_DRV_CRIT_RATELIMIT(device, "sync_fence_create failed\n");
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	priv.fence_fd = get_unused_fd_flags(0);
+	if (priv.fence_fd < 0) {
+		KGSL_DRV_CRIT_RATELIMIT(device,
+			"Unable to get a file descriptor: %d\n",
+			priv.fence_fd);
+		ret = priv.fence_fd;
+		goto out;
+	}
+
+	/*
+	 * If the timestamp hasn't expired yet create an event to trigger it.
+	 * Otherwise, just signal the fence - there is no reason to go through
+	 * the effort of creating a fence we don't need.
+	 */
+
+	kgsl_readtimestamp(device, context, KGSL_TIMESTAMP_RETIRED, &cur);
+
+	if (timestamp_cmp(cur, timestamp) >= 0) {
+		ret = 0;
+		kgsl_sync_timeline_signal(context->timeline, cur);
+	} else {
+		ret = _add_fence_event(device, context, timestamp);
+		if (ret)
+			goto out;
+	}
+
+	if (copy_to_user(data, &priv, sizeof(priv))) {
+		ret = -EFAULT;
+		goto out;
+	}
+	sync_fence_install(fence, priv.fence_fd);
+out:
+	kgsl_context_put(context);
+	if (ret) {
+		if (priv.fence_fd >= 0)
+			put_unused_fd(priv.fence_fd);
+
+		if (fence)
+			sync_fence_put(fence);
+	}
+	return ret;
+}
+
+static unsigned int kgsl_sync_get_timestamp(
+	struct kgsl_sync_timeline *ktimeline, enum kgsl_timestamp_type type)
+{
+	unsigned int ret = 0;
+	struct kgsl_context *context;
+
+	if (ktimeline->device == NULL)
+		return 0;
+
+	context = kgsl_context_get(ktimeline->device,
+			ktimeline->context_id);
+
+	if (context)
+		kgsl_readtimestamp(ktimeline->device, context, type, &ret);
+
+	kgsl_context_put(context);
+	return ret;
+}
+
+static void kgsl_sync_timeline_value_str(struct sync_timeline *sync_timeline,
+					 char *str, int size)
+{
+	struct kgsl_sync_timeline *ktimeline =
+		(struct kgsl_sync_timeline *) sync_timeline;
+
+	/*
+	 * This callback can be called before the device and spinlock are
+	 * initialized in struct kgsl_sync_timeline. kgsl_sync_get_timestamp()
+	 * will check if device is NULL and return 0. Queued and retired
+	 * timestamp of the context will be reported as 0, which is correct
+	 * because the context and timeline are just getting initialized.
+	 */
+	unsigned int timestamp_retired = kgsl_sync_get_timestamp(ktimeline,
+		KGSL_TIMESTAMP_RETIRED);
+	unsigned int timestamp_queued = kgsl_sync_get_timestamp(ktimeline,
+		KGSL_TIMESTAMP_QUEUED);
+
+	snprintf(str, size, "%u queued:%u retired:%u",
+		ktimeline->last_timestamp,
+		timestamp_queued, timestamp_retired);
+}
+
+static void kgsl_sync_pt_value_str(struct sync_pt *sync_pt,
+				   char *str, int size)
+{
+	struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) sync_pt;
+
+	snprintf(str, size, "%u", kpt->timestamp);
+}
+
+static int kgsl_sync_fill_driver_data(struct sync_pt *sync_pt, void *data,
+					int size)
+{
+	struct kgsl_sync_pt *kpt = (struct kgsl_sync_pt *) sync_pt;
+
+	if (size < sizeof(kpt->timestamp))
+		return -ENOMEM;
+
+	memcpy(data, &kpt->timestamp, sizeof(kpt->timestamp));
+	return sizeof(kpt->timestamp);
+}
+
+static void kgsl_sync_timeline_release_obj(struct sync_timeline *sync_timeline)
+{
+	/*
+	 * Make sure to free the timeline only after destroy flag is set.
+	 * This is to avoid further accessing to the timeline from KGSL and
+	 * also to catch any unbalanced kref of timeline.
+	 */
+	BUG_ON(sync_timeline && (sync_timeline->destroyed != true));
+}
+static const struct sync_timeline_ops kgsl_sync_timeline_ops = {
+	.driver_name = "kgsl-timeline",
+	.dup = kgsl_sync_pt_dup,
+	.has_signaled = kgsl_sync_pt_has_signaled,
+	.compare = kgsl_sync_pt_compare,
+	.timeline_value_str = kgsl_sync_timeline_value_str,
+	.pt_value_str = kgsl_sync_pt_value_str,
+	.fill_driver_data = kgsl_sync_fill_driver_data,
+	.release_obj = kgsl_sync_timeline_release_obj,
+};
+
+int kgsl_sync_timeline_create(struct kgsl_context *context)
+{
+	struct kgsl_sync_timeline *ktimeline;
+
+	/*
+	 * Generate a name which includes the thread name, thread id, process
+	 * name, process id, and context id. This makes it possible to
+	 * identify the context of a timeline in the sync dump.
+	 */
+	char ktimeline_name[sizeof(context->timeline->name)] = {};
+
+	snprintf(ktimeline_name, sizeof(ktimeline_name),
+		"%s_%.15s(%d)-%.15s(%d)-%d",
+		context->device->name,
+		current->group_leader->comm, current->group_leader->pid,
+		current->comm, current->pid, context->id);
+
+	context->timeline = sync_timeline_create(&kgsl_sync_timeline_ops,
+		(int) sizeof(struct kgsl_sync_timeline), ktimeline_name);
+	if (context->timeline == NULL)
+		return -EINVAL;
+
+	ktimeline = (struct kgsl_sync_timeline *) context->timeline;
+	ktimeline->last_timestamp = 0;
+	ktimeline->device = context->device;
+	ktimeline->context_id = context->id;
+
+	spin_lock_init(&ktimeline->lock);
+	return 0;
+}
+
+static void kgsl_sync_timeline_signal(struct sync_timeline *timeline,
+	unsigned int timestamp)
+{
+	struct kgsl_sync_timeline *ktimeline =
+		(struct kgsl_sync_timeline *) timeline;
+
+	spin_lock(&ktimeline->lock);
+	if (timestamp_cmp(timestamp, ktimeline->last_timestamp) > 0)
+		ktimeline->last_timestamp = timestamp;
+	spin_unlock(&ktimeline->lock);
+
+	sync_timeline_signal(timeline);
+}
+
+void kgsl_sync_timeline_destroy(struct kgsl_context *context)
+{
+	sync_timeline_destroy(context->timeline);
+}
+
+static void kgsl_sync_callback(struct sync_fence *fence,
+	struct sync_fence_waiter *waiter)
+{
+	struct kgsl_sync_fence_waiter *kwaiter =
+		(struct kgsl_sync_fence_waiter *) waiter;
+	kwaiter->func(kwaiter->priv);
+	sync_fence_put(kwaiter->fence);
+	kfree(kwaiter);
+}
+
+struct kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd,
+	void (*func)(void *priv), void *priv)
+{
+	struct kgsl_sync_fence_waiter *kwaiter;
+	struct sync_fence *fence;
+	int status;
+
+	fence = sync_fence_fdget(fd);
+	if (fence == NULL)
+		return ERR_PTR(-EINVAL);
+
+	/* create the waiter */
+	kwaiter = kzalloc(sizeof(*kwaiter), GFP_ATOMIC);
+	if (kwaiter == NULL) {
+		sync_fence_put(fence);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	kwaiter->fence = fence;
+	kwaiter->priv = priv;
+	kwaiter->func = func;
+
+	strlcpy(kwaiter->name, fence->name, sizeof(kwaiter->name));
+
+	sync_fence_waiter_init((struct sync_fence_waiter *) kwaiter,
+		kgsl_sync_callback);
+
+	/* if status then error or signaled */
+	status = sync_fence_wait_async(fence,
+		(struct sync_fence_waiter *) kwaiter);
+	if (status) {
+		kfree(kwaiter);
+		sync_fence_put(fence);
+		if (status < 0)
+			kwaiter = ERR_PTR(status);
+		else
+			kwaiter = NULL;
+	}
+
+	return kwaiter;
+}
+
+int kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *kwaiter)
+{
+	if (kwaiter == NULL)
+		return 0;
+
+	if (sync_fence_cancel_async(kwaiter->fence,
+		(struct sync_fence_waiter *) kwaiter) == 0) {
+		sync_fence_put(kwaiter->fence);
+		kfree(kwaiter);
+		return 1;
+	}
+	return 0;
+}
+
+#ifdef CONFIG_ONESHOT_SYNC
+
+#include "oneshot_sync.h"
+
+struct kgsl_syncsource {
+	struct kref refcount;
+	int id;
+	struct kgsl_process_private *private;
+	struct oneshot_sync_timeline *oneshot;
+};
+
+long kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_syncsource *syncsource = NULL;
+	struct kgsl_syncsource_create *param = data;
+	int ret = -EINVAL;
+	int id = 0;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+	char name[32];
+
+	syncsource = kzalloc(sizeof(*syncsource), GFP_KERNEL);
+	if (syncsource == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	snprintf(name, sizeof(name), "kgsl-syncsource-pid-%d",
+			current->group_leader->pid);
+
+	syncsource->oneshot = oneshot_timeline_create(name);
+	if (syncsource->oneshot == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	kref_init(&syncsource->refcount);
+	syncsource->private = private;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock(&private->syncsource_lock);
+	id = idr_alloc(&private->syncsource_idr, syncsource, 1, 0, GFP_NOWAIT);
+	if (id > 0) {
+		syncsource->id = id;
+		param->id = id;
+		ret = 0;
+	} else {
+		ret = id;
+	}
+
+	spin_unlock(&private->syncsource_lock);
+	idr_preload_end();
+
+out:
+	if (ret) {
+		if (syncsource && syncsource->oneshot)
+			oneshot_timeline_destroy(syncsource->oneshot);
+		kfree(syncsource);
+	}
+
+	return ret;
+}
+
+static struct kgsl_syncsource *
+kgsl_syncsource_get(struct kgsl_process_private *private, int id)
+{
+	int result = 0;
+	struct kgsl_syncsource *syncsource = NULL;
+
+	spin_lock(&private->syncsource_lock);
+
+	syncsource = idr_find(&private->syncsource_idr, id);
+	if (syncsource)
+		result = kref_get_unless_zero(&syncsource->refcount);
+
+	spin_unlock(&private->syncsource_lock);
+
+	return result ? syncsource : NULL;
+}
+
+static void kgsl_syncsource_destroy(struct kref *kref)
+{
+	struct kgsl_syncsource *syncsource = container_of(kref,
+						struct kgsl_syncsource,
+						refcount);
+
+	struct kgsl_process_private *private = syncsource->private;
+
+	spin_lock(&private->syncsource_lock);
+	if (syncsource->id != 0) {
+		idr_remove(&private->syncsource_idr, syncsource->id);
+		syncsource->id = 0;
+	}
+	oneshot_timeline_destroy(syncsource->oneshot);
+	spin_unlock(&private->syncsource_lock);
+
+	kfree(syncsource);
+}
+
+void kgsl_syncsource_put(struct kgsl_syncsource *syncsource)
+{
+	if (syncsource)
+		kref_put(&syncsource->refcount, kgsl_syncsource_destroy);
+}
+
+long kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_syncsource_destroy *param = data;
+	struct kgsl_syncsource *syncsource = NULL;
+	struct kgsl_process_private *private = dev_priv->process_priv;
+
+	spin_lock(&private->syncsource_lock);
+	syncsource = idr_find(&private->syncsource_idr, param->id);
+
+	if (syncsource) {
+		idr_remove(&private->syncsource_idr, param->id);
+		syncsource->id = 0;
+	}
+
+	spin_unlock(&private->syncsource_lock);
+
+	if (syncsource == NULL)
+		return -EINVAL;
+
+	/* put reference from syncsource creation */
+	kgsl_syncsource_put(syncsource);
+	return 0;
+}
+
+long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	struct kgsl_syncsource_create_fence *param = data;
+	struct kgsl_syncsource *syncsource = NULL;
+	int ret = -EINVAL;
+	struct sync_fence *fence = NULL;
+	int fd = -1;
+	char name[32];
+
+
+	syncsource = kgsl_syncsource_get(dev_priv->process_priv,
+					param->id);
+	if (syncsource == NULL)
+		goto out;
+
+	snprintf(name, sizeof(name), "kgsl-syncsource-pid-%d-%d",
+			current->group_leader->pid, syncsource->id);
+
+	fence = oneshot_fence_create(syncsource->oneshot, name);
+	if (fence == NULL) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	fd = get_unused_fd_flags(0);
+	if (fd < 0) {
+		ret = -EBADF;
+		goto out;
+	}
+	ret = 0;
+
+	sync_fence_install(fence, fd);
+
+	param->fence_fd = fd;
+out:
+	if (ret) {
+		if (fence)
+			sync_fence_put(fence);
+		if (fd >= 0)
+			put_unused_fd(fd);
+
+	}
+	kgsl_syncsource_put(syncsource);
+	return ret;
+}
+
+long kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	int ret = -EINVAL;
+	struct kgsl_syncsource_signal_fence *param = data;
+	struct kgsl_syncsource *syncsource = NULL;
+	struct sync_fence *fence = NULL;
+
+	syncsource = kgsl_syncsource_get(dev_priv->process_priv,
+					param->id);
+	if (syncsource == NULL)
+		goto out;
+
+	fence = sync_fence_fdget(param->fence_fd);
+	if (fence == NULL) {
+		ret = -EBADF;
+		goto out;
+	}
+
+	ret = oneshot_fence_signal(syncsource->oneshot, fence);
+out:
+	if (fence)
+		sync_fence_put(fence);
+	kgsl_syncsource_put(syncsource);
+	return ret;
+}
+#endif
diff --git a/drivers/gpu/msm/kgsl_sync.h b/drivers/gpu/msm/kgsl_sync.h
new file mode 100644
index 0000000..68b4246
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_sync.h
@@ -0,0 +1,139 @@
+/* Copyright (c) 2012-2014, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+#ifndef __KGSL_SYNC_H
+#define __KGSL_SYNC_H
+
+#include "sync.h"
+#include "kgsl_device.h"
+
+struct kgsl_sync_timeline {
+	struct sync_timeline timeline;
+	unsigned int last_timestamp;
+	struct kgsl_device *device;
+	u32 context_id;
+	spinlock_t lock;
+};
+
+struct kgsl_sync_pt {
+	struct sync_pt pt;
+	struct kgsl_context *context;
+	unsigned int timestamp;
+};
+
+struct kgsl_sync_fence_waiter {
+	struct sync_fence_waiter waiter;
+	struct sync_fence *fence;
+	char name[32];
+	void (*func)(void *priv);
+	void *priv;
+};
+
+struct kgsl_syncsource;
+
+#if defined(CONFIG_SYNC)
+int kgsl_add_fence_event(struct kgsl_device *device,
+	u32 context_id, u32 timestamp, void __user *data, int len,
+	struct kgsl_device_private *owner);
+int kgsl_sync_timeline_create(struct kgsl_context *context);
+void kgsl_sync_timeline_destroy(struct kgsl_context *context);
+struct kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd,
+	void (*func)(void *priv), void *priv);
+int kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *waiter);
+static inline void kgsl_sync_fence_log(struct sync_fence *fence)
+{
+}
+#else
+static inline int kgsl_add_fence_event(struct kgsl_device *device,
+	u32 context_id, u32 timestamp, void __user *data, int len,
+	struct kgsl_device_private *owner)
+{
+	return -EINVAL;
+}
+
+static inline int kgsl_sync_timeline_create(struct kgsl_context *context)
+{
+	context->timeline = NULL;
+	return 0;
+}
+
+static inline void kgsl_sync_timeline_destroy(struct kgsl_context *context)
+{
+}
+
+static inline struct
+kgsl_sync_fence_waiter *kgsl_sync_fence_async_wait(int fd,
+	void (*func)(void *priv), void *priv)
+{
+	return NULL;
+}
+
+static inline int
+kgsl_sync_fence_async_cancel(struct kgsl_sync_fence_waiter *waiter)
+{
+	return 1;
+}
+
+static inline void kgsl_sync_fence_log(struct sync_fence *fence)
+{
+}
+
+#endif
+
+#ifdef CONFIG_ONESHOT_SYNC
+long kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+long kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data);
+
+void kgsl_syncsource_put(struct kgsl_syncsource *syncsource);
+
+#else
+static inline long
+kgsl_ioctl_syncsource_create(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	return -ENOIOCTLCMD;
+}
+
+static inline long
+kgsl_ioctl_syncsource_destroy(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	return -ENOIOCTLCMD;
+}
+
+static inline long
+kgsl_ioctl_syncsource_create_fence(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	return -ENOIOCTLCMD;
+}
+
+static inline long
+kgsl_ioctl_syncsource_signal_fence(struct kgsl_device_private *dev_priv,
+					unsigned int cmd, void *data)
+{
+	return -ENOIOCTLCMD;
+}
+
+static inline void kgsl_syncsource_put(struct kgsl_syncsource *syncsource)
+{
+
+}
+#endif
+
+#endif /* __KGSL_SYNC_H */
diff --git a/drivers/gpu/msm/kgsl_trace.c b/drivers/gpu/msm/kgsl_trace.c
new file mode 100644
index 0000000..3541425
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_trace.c
@@ -0,0 +1,26 @@
+/* Copyright (c) 2011, 2013, 2015 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#include <linux/module.h>
+
+#include "kgsl.h"
+#include "kgsl_device.h"
+
+/* Instantiate tracepoints */
+#define CREATE_TRACE_POINTS
+#include "kgsl_trace.h"
+
+EXPORT_TRACEPOINT_SYMBOL(kgsl_regwrite);
+EXPORT_TRACEPOINT_SYMBOL(kgsl_issueibcmds);
+EXPORT_TRACEPOINT_SYMBOL(kgsl_user_pwrlevel_constraint);
+EXPORT_TRACEPOINT_SYMBOL(kgsl_constraint);
diff --git a/drivers/gpu/msm/kgsl_trace.h b/drivers/gpu/msm/kgsl_trace.h
new file mode 100644
index 0000000..cdcc028
--- /dev/null
+++ b/drivers/gpu/msm/kgsl_trace.h
@@ -0,0 +1,1233 @@
+/* Copyright (c) 2011-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#if !defined(_KGSL_TRACE_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _KGSL_TRACE_H
+
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM kgsl
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE kgsl_trace
+
+#include <linux/tracepoint.h>
+#include "kgsl_device.h"
+#include "adreno_drawctxt.h"
+
+struct kgsl_device;
+struct kgsl_ringbuffer_issueibcmds;
+struct kgsl_device_waittimestamp;
+
+/*
+ * Tracepoint for kgsl issue ib commands
+ */
+TRACE_EVENT(kgsl_issueibcmds,
+
+	TP_PROTO(struct kgsl_device *device,
+			int drawctxt_id,
+			unsigned int numibs,
+			int timestamp,
+			int flags,
+			int result,
+			unsigned int type),
+
+	TP_ARGS(device, drawctxt_id, numibs, timestamp,
+		flags, result, type),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, drawctxt_id)
+		__field(unsigned int, numibs)
+		__field(unsigned int, timestamp)
+		__field(unsigned int, flags)
+		__field(int, result)
+		__field(unsigned int, drawctxt_type)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->drawctxt_id = drawctxt_id;
+		__entry->numibs = numibs;
+		__entry->timestamp = timestamp;
+		__entry->flags = flags;
+		__entry->result = result;
+		__entry->drawctxt_type = type;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u ib=0x0 numibs=%u ts=%u flags=%s result=%d type=%s",
+		__get_str(device_name),
+		__entry->drawctxt_id,
+		__entry->numibs,
+		__entry->timestamp,
+		__entry->flags ? __print_flags(__entry->flags, "|",
+						KGSL_DRAWOBJ_FLAGS) : "None",
+		__entry->result,
+		__print_symbolic(__entry->drawctxt_type, KGSL_CONTEXT_TYPES)
+	)
+);
+
+/*
+ * Tracepoint for kgsl readtimestamp
+ */
+TRACE_EVENT(kgsl_readtimestamp,
+
+	TP_PROTO(struct kgsl_device *device,
+			unsigned int context_id,
+			unsigned int type,
+			unsigned int timestamp),
+
+	TP_ARGS(device, context_id, type, timestamp),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, context_id)
+		__field(unsigned int, type)
+		__field(unsigned int, timestamp)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->context_id = context_id;
+		__entry->type = type;
+		__entry->timestamp = timestamp;
+	),
+
+	TP_printk(
+		"d_name=%s context_id=%u type=%u ts=%u",
+		__get_str(device_name),
+		__entry->context_id,
+		__entry->type,
+		__entry->timestamp
+	)
+);
+
+/*
+ * Tracepoint for kgsl waittimestamp entry
+ */
+TRACE_EVENT(kgsl_waittimestamp_entry,
+
+	TP_PROTO(struct kgsl_device *device,
+			unsigned int context_id,
+			unsigned int curr_ts,
+			unsigned int wait_ts,
+			unsigned int timeout),
+
+	TP_ARGS(device, context_id, curr_ts, wait_ts, timeout),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, context_id)
+		__field(unsigned int, curr_ts)
+		__field(unsigned int, wait_ts)
+		__field(unsigned int, timeout)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->context_id = context_id;
+		__entry->curr_ts = curr_ts;
+		__entry->wait_ts = wait_ts;
+		__entry->timeout = timeout;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u curr_ts=%u ts=%u timeout=%u",
+		__get_str(device_name),
+		__entry->context_id,
+		__entry->curr_ts,
+		__entry->wait_ts,
+		__entry->timeout
+	)
+);
+
+/*
+ * Tracepoint for kgsl waittimestamp exit
+ */
+TRACE_EVENT(kgsl_waittimestamp_exit,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int curr_ts,
+		 int result),
+
+	TP_ARGS(device, curr_ts, result),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, curr_ts)
+		__field(int, result)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->curr_ts = curr_ts;
+		__entry->result = result;
+	),
+
+	TP_printk(
+		"d_name=%s curr_ts=%u result=%d",
+		__get_str(device_name),
+		__entry->curr_ts,
+		__entry->result
+	)
+);
+
+DECLARE_EVENT_CLASS(kgsl_pwr_template,
+	TP_PROTO(struct kgsl_device *device, int on),
+
+	TP_ARGS(device, on),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(int, on)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->on = on;
+	),
+
+	TP_printk(
+		"d_name=%s flag=%s",
+		__get_str(device_name),
+		__entry->on ? "on" : "off"
+	)
+);
+
+DEFINE_EVENT(kgsl_pwr_template, kgsl_irq,
+	TP_PROTO(struct kgsl_device *device, int on),
+	TP_ARGS(device, on)
+);
+
+DEFINE_EVENT(kgsl_pwr_template, kgsl_bus,
+	TP_PROTO(struct kgsl_device *device, int on),
+	TP_ARGS(device, on)
+);
+
+DEFINE_EVENT(kgsl_pwr_template, kgsl_rail,
+	TP_PROTO(struct kgsl_device *device, int on),
+	TP_ARGS(device, on)
+);
+
+TRACE_EVENT(kgsl_clk,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int on,
+		unsigned int freq),
+
+	TP_ARGS(device, on, freq),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(int, on)
+		__field(unsigned int, freq)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->on = on;
+		__entry->freq = freq;
+	),
+
+	TP_printk(
+		"d_name=%s flag=%s active_freq=%d",
+		__get_str(device_name),
+		__entry->on ? "on" : "off",
+		__entry->freq
+	)
+);
+
+TRACE_EVENT(kgsl_pwrlevel,
+
+	TP_PROTO(struct kgsl_device *device,
+		unsigned int pwrlevel,
+		unsigned int freq,
+		unsigned int prev_pwrlevel,
+		unsigned int prev_freq),
+
+	TP_ARGS(device, pwrlevel, freq, prev_pwrlevel, prev_freq),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, pwrlevel)
+		__field(unsigned int, freq)
+		__field(unsigned int, prev_pwrlevel)
+		__field(unsigned int, prev_freq)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->pwrlevel = pwrlevel;
+		__entry->freq = freq;
+		__entry->prev_pwrlevel = prev_pwrlevel;
+		__entry->prev_freq = prev_freq;
+	),
+
+	TP_printk(
+		"d_name=%s pwrlevel=%d freq=%d prev_pwrlevel=%d prev_freq=%d",
+		__get_str(device_name),
+		__entry->pwrlevel,
+		__entry->freq,
+		__entry->prev_pwrlevel,
+		__entry->prev_freq
+	)
+);
+
+TRACE_EVENT(kgsl_buslevel,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int pwrlevel,
+		 unsigned int bus),
+
+	TP_ARGS(device, pwrlevel, bus),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, pwrlevel)
+		__field(unsigned int, bus)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->pwrlevel = pwrlevel;
+		__entry->bus = bus;
+	),
+
+	TP_printk(
+		"d_name=%s pwrlevel=%d bus=%d",
+		__get_str(device_name),
+		__entry->pwrlevel,
+		__entry->bus
+	)
+);
+
+TRACE_EVENT(kgsl_gpubusy,
+	TP_PROTO(struct kgsl_device *device, unsigned int busy,
+		unsigned int elapsed),
+
+	TP_ARGS(device, busy, elapsed),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, busy)
+		__field(unsigned int, elapsed)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->busy = busy;
+		__entry->elapsed = elapsed;
+	),
+
+	TP_printk(
+		"d_name=%s busy=%u elapsed=%d",
+		__get_str(device_name),
+		__entry->busy,
+		__entry->elapsed
+	)
+);
+
+TRACE_EVENT(kgsl_pwrstats,
+	TP_PROTO(struct kgsl_device *device, s64 time,
+		struct kgsl_power_stats *pstats, u32 ctxt_count),
+
+	TP_ARGS(device, time, pstats, ctxt_count),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(s64, total_time)
+		__field(u64, busy_time)
+		__field(u64, ram_time)
+		__field(u64, ram_wait)
+		__field(u32, context_count)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->total_time = time;
+		__entry->busy_time = pstats->busy_time;
+		__entry->ram_time = pstats->ram_time;
+		__entry->ram_wait = pstats->ram_wait;
+		__entry->context_count = ctxt_count;
+	),
+
+	TP_printk(
+		"d_name=%s total=%lld busy=%lld ram_time=%lld ram_wait=%lld context_count=%u",
+		__get_str(device_name), __entry->total_time, __entry->busy_time,
+		__entry->ram_time, __entry->ram_wait, __entry->context_count
+	)
+);
+
+DECLARE_EVENT_CLASS(kgsl_pwrstate_template,
+	TP_PROTO(struct kgsl_device *device, unsigned int state),
+
+	TP_ARGS(device, state),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, state)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->state = state;
+	),
+
+	TP_printk(
+		"d_name=%s state=%s",
+		__get_str(device_name),
+		kgsl_pwrstate_to_str(__entry->state)
+	)
+);
+
+DEFINE_EVENT(kgsl_pwrstate_template, kgsl_pwr_set_state,
+	TP_PROTO(struct kgsl_device *device, unsigned int state),
+	TP_ARGS(device, state)
+);
+
+DEFINE_EVENT(kgsl_pwrstate_template, kgsl_pwr_request_state,
+	TP_PROTO(struct kgsl_device *device, unsigned int state),
+	TP_ARGS(device, state)
+);
+
+TRACE_EVENT(kgsl_mem_alloc,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry),
+
+	TP_ARGS(mem_entry),
+
+	TP_STRUCT__entry(
+		__field(uint64_t, gpuaddr)
+		__field(uint64_t, size)
+		__field(unsigned int, tgid)
+		__array(char, usage, 16)
+		__field(unsigned int, id)
+		__field(uint64_t, flags)
+	),
+
+	TP_fast_assign(
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		__entry->size = mem_entry->memdesc.size;
+		__entry->tgid = mem_entry->priv->pid;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->id = mem_entry->id;
+		__entry->flags = mem_entry->memdesc.flags;
+	),
+
+	TP_printk(
+		"gpuaddr=0x%llx size=%llu tgid=%u usage=%s id=%u flags=0x%llx",
+		__entry->gpuaddr, __entry->size, __entry->tgid,
+		__entry->usage, __entry->id, __entry->flags
+	)
+);
+
+TRACE_EVENT(kgsl_mem_mmap,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry),
+
+	TP_ARGS(mem_entry),
+
+	TP_STRUCT__entry(
+		__field(unsigned long, useraddr)
+		__field(uint64_t, gpuaddr)
+		__field(uint64_t, size)
+		__array(char, usage, 16)
+		__field(unsigned int, id)
+		__field(uint64_t, flags)
+	),
+
+	TP_fast_assign(
+		__entry->useraddr = mem_entry->memdesc.useraddr;
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		__entry->size = mem_entry->memdesc.size;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->id = mem_entry->id;
+		__entry->flags = mem_entry->memdesc.flags;
+	),
+
+	TP_printk(
+	 "useraddr=0x%lx gpuaddr=0x%llx size=%llu usage=%s id=%u flags=0x%llx",
+		__entry->useraddr, __entry->gpuaddr, __entry->size,
+		__entry->usage, __entry->id, __entry->flags
+	)
+);
+
+TRACE_EVENT(kgsl_mem_unmapped_area_collision,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry,
+		 unsigned long addr,
+		 unsigned long len),
+
+	TP_ARGS(mem_entry, addr, len),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(unsigned long, addr)
+		__field(unsigned long, len)
+	),
+
+	TP_fast_assign(
+		__entry->id = mem_entry->id;
+		__entry->len = len;
+		__entry->addr = addr;
+	),
+
+	TP_printk(
+		"id=%u len=%lu addr=0x%lx",
+		__entry->id, __entry->len, __entry->addr
+	)
+);
+
+TRACE_EVENT(kgsl_mem_map,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry, int fd),
+
+	TP_ARGS(mem_entry, fd),
+
+	TP_STRUCT__entry(
+		__field(uint64_t, gpuaddr)
+		__field(uint64_t, size)
+		__field(int, fd)
+		__field(int, type)
+		__field(unsigned int, tgid)
+		__array(char, usage, 16)
+		__field(unsigned int, id)
+	),
+
+	TP_fast_assign(
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		__entry->size = mem_entry->memdesc.size;
+		__entry->fd = fd;
+		__entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc);
+		__entry->tgid = mem_entry->priv->pid;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->id = mem_entry->id;
+	),
+
+	TP_printk(
+		"gpuaddr=0x%llx size=%llu type=%s fd=%d tgid=%u usage=%s id=%u",
+		__entry->gpuaddr, __entry->size,
+		__print_symbolic(__entry->type, KGSL_MEM_TYPES),
+		__entry->fd, __entry->tgid,
+		__entry->usage, __entry->id
+	)
+);
+
+TRACE_EVENT(kgsl_mem_free,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry),
+
+	TP_ARGS(mem_entry),
+
+	TP_STRUCT__entry(
+		__field(uint64_t, gpuaddr)
+		__field(uint64_t, size)
+		__field(int, type)
+		__field(int, fd)
+		__field(unsigned int, tgid)
+		__array(char, usage, 16)
+		__field(unsigned int, id)
+	),
+
+	TP_fast_assign(
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		__entry->size = mem_entry->memdesc.size;
+		__entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc);
+		__entry->tgid = mem_entry->priv->pid;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->id = mem_entry->id;
+	),
+
+	TP_printk(
+		"gpuaddr=0x%llx size=%llu type=%s tgid=%u usage=%s id=%u",
+		__entry->gpuaddr, __entry->size,
+		__print_symbolic(__entry->type, KGSL_MEM_TYPES),
+		__entry->tgid, __entry->usage, __entry->id
+	)
+);
+
+TRACE_EVENT(kgsl_mem_sync_cache,
+
+	TP_PROTO(struct kgsl_mem_entry *mem_entry, uint64_t offset,
+		uint64_t length, unsigned int op),
+
+	TP_ARGS(mem_entry, offset, length, op),
+
+	TP_STRUCT__entry(
+		__field(uint64_t, gpuaddr)
+		__array(char, usage, 16)
+		__field(unsigned int, tgid)
+		__field(unsigned int, id)
+		__field(unsigned int, op)
+		__field(uint64_t, offset)
+		__field(uint64_t, length)
+	),
+
+	TP_fast_assign(
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->tgid = mem_entry->priv->pid;
+		__entry->id = mem_entry->id;
+		__entry->op = op;
+		__entry->offset = offset;
+		__entry->length = (length == 0) ?
+				mem_entry->memdesc.size : length;
+	),
+
+	TP_printk(
+	 "gpuaddr=0x%llx size=%llu tgid=%u  usage=%s id=%u op=%c%c offset=%llu",
+		__entry->gpuaddr,  __entry->length,
+		__entry->tgid, __entry->usage, __entry->id,
+		(__entry->op & KGSL_GPUMEM_CACHE_CLEAN) ? 'c' : '.',
+		(__entry->op & KGSL_GPUMEM_CACHE_INV) ? 'i' : '.',
+		__entry->offset
+	)
+);
+
+TRACE_EVENT(kgsl_mem_sync_full_cache,
+
+	TP_PROTO(unsigned int num_bufs, uint64_t bulk_size),
+	TP_ARGS(num_bufs, bulk_size),
+
+	TP_STRUCT__entry(
+		__field(unsigned int, num_bufs)
+		__field(uint64_t, bulk_size)
+	),
+
+	TP_fast_assign(
+		__entry->num_bufs = num_bufs;
+		__entry->bulk_size = bulk_size;
+	),
+
+	TP_printk(
+		"num_bufs=%u bulk_size=%llu op=ci",
+		__entry->num_bufs, __entry->bulk_size
+	)
+);
+
+DECLARE_EVENT_CLASS(kgsl_mem_timestamp_template,
+
+	TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry,
+		unsigned int id, unsigned int curr_ts, unsigned int free_ts),
+
+	TP_ARGS(device, mem_entry, id, curr_ts, free_ts),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(uint64_t, gpuaddr)
+		__field(uint64_t, size)
+		__field(int, type)
+		__array(char, usage, 16)
+		__field(unsigned int, id)
+		__field(unsigned int, drawctxt_id)
+		__field(unsigned int, curr_ts)
+		__field(unsigned int, free_ts)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->gpuaddr = mem_entry->memdesc.gpuaddr;
+		__entry->size = mem_entry->memdesc.size;
+		kgsl_get_memory_usage(__entry->usage, sizeof(__entry->usage),
+				     mem_entry->memdesc.flags);
+		__entry->id = mem_entry->id;
+		__entry->drawctxt_id = id;
+		__entry->type = kgsl_memdesc_usermem_type(&mem_entry->memdesc);
+		__entry->curr_ts = curr_ts;
+		__entry->free_ts = free_ts;
+	),
+
+	TP_printk(
+		"d_name=%s gpuaddr=0x%llx size=%llu type=%s usage=%s id=%u ctx=%u curr_ts=%u free_ts=%u",
+		__get_str(device_name),
+		__entry->gpuaddr,
+		__entry->size,
+		__print_symbolic(__entry->type, KGSL_MEM_TYPES),
+		__entry->usage,
+		__entry->id,
+		__entry->drawctxt_id,
+		__entry->curr_ts,
+		__entry->free_ts
+	)
+);
+
+DEFINE_EVENT(kgsl_mem_timestamp_template, kgsl_mem_timestamp_queue,
+	TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry,
+		unsigned int id, unsigned int curr_ts, unsigned int free_ts),
+	TP_ARGS(device, mem_entry, id, curr_ts, free_ts)
+);
+
+DEFINE_EVENT(kgsl_mem_timestamp_template, kgsl_mem_timestamp_free,
+	TP_PROTO(struct kgsl_device *device, struct kgsl_mem_entry *mem_entry,
+		unsigned int id, unsigned int curr_ts, unsigned int free_ts),
+	TP_ARGS(device, mem_entry, id, curr_ts, free_ts)
+);
+
+TRACE_EVENT(kgsl_context_create,
+
+	TP_PROTO(struct kgsl_device *device, struct kgsl_context *context,
+		 unsigned int flags),
+
+	TP_ARGS(device, context, flags),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, id)
+		__field(unsigned int, flags)
+		__field(unsigned int, priority)
+		__field(unsigned int, type)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->id = context->id;
+		__entry->flags = flags & ~(KGSL_CONTEXT_PRIORITY_MASK |
+						KGSL_CONTEXT_TYPE_MASK);
+		__entry->priority =
+			(flags & KGSL_CONTEXT_PRIORITY_MASK)
+				>> KGSL_CONTEXT_PRIORITY_SHIFT;
+		__entry->type =
+			(flags & KGSL_CONTEXT_TYPE_MASK)
+				>> KGSL_CONTEXT_TYPE_SHIFT;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u flags=%s priority=%u type=%s",
+		__get_str(device_name), __entry->id,
+		__entry->flags ? __print_flags(__entry->flags, "|",
+						KGSL_CONTEXT_FLAGS) : "None",
+		__entry->priority,
+		__print_symbolic(__entry->type, KGSL_CONTEXT_TYPES)
+	)
+);
+
+TRACE_EVENT(kgsl_context_detach,
+
+	TP_PROTO(struct kgsl_device *device, struct kgsl_context *context),
+
+	TP_ARGS(device, context),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, id)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->id = context->id;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u",
+		__get_str(device_name), __entry->id
+	)
+);
+
+TRACE_EVENT(kgsl_context_destroy,
+
+	TP_PROTO(struct kgsl_device *device, struct kgsl_context *context),
+
+	TP_ARGS(device, context),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, id)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->id = context->id;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u",
+		__get_str(device_name), __entry->id
+	)
+);
+
+TRACE_EVENT(kgsl_user_pwrlevel_constraint,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int id, unsigned int type,
+		unsigned int sub_type),
+
+	TP_ARGS(device, id, type, sub_type),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, id)
+		__field(unsigned int, type)
+		__field(unsigned int, sub_type)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->id = id;
+		__entry->type = type;
+		__entry->sub_type = sub_type;
+	),
+
+	TP_printk(
+		"d_name=%s ctx=%u constraint_type=%s constraint_subtype=%s",
+		__get_str(device_name), __entry->id,
+		__print_symbolic(__entry->type, KGSL_CONSTRAINT_TYPES),
+		__print_symbolic(__entry->sub_type,
+		KGSL_CONSTRAINT_PWRLEVEL_SUBTYPES)
+	)
+);
+
+TRACE_EVENT(kgsl_constraint,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int type,
+		unsigned int value, unsigned int on),
+
+	TP_ARGS(device, type, value, on),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, type)
+		__field(unsigned int, value)
+		__field(unsigned int, on)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->type = type;
+		__entry->value = value;
+		__entry->on = on;
+	),
+
+	TP_printk(
+		"d_name=%s constraint_type=%s constraint_value=%u status=%s",
+		__get_str(device_name),
+		__print_symbolic(__entry->type, KGSL_CONSTRAINT_TYPES),
+		__entry->value,
+		__entry->on ? "ON" : "OFF"
+	)
+);
+
+TRACE_EVENT(kgsl_mmu_pagefault,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int page,
+		 unsigned int pt, const char *op),
+
+	TP_ARGS(device, page, pt, op),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, page)
+		__field(unsigned int, pt)
+		__string(op, op)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->page = page;
+		__entry->pt = pt;
+		__assign_str(op, op);
+	),
+
+	TP_printk(
+		"d_name=%s page=0x%08x pt=%u op=%s",
+		__get_str(device_name), __entry->page, __entry->pt,
+		__get_str(op)
+	)
+);
+
+TRACE_EVENT(kgsl_regwrite,
+
+	TP_PROTO(struct kgsl_device *device, unsigned int offset,
+		unsigned int value),
+
+	TP_ARGS(device, offset, value),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, offset)
+		__field(unsigned int, value)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->offset = offset;
+		__entry->value = value;
+	),
+
+	TP_printk(
+		"d_name=%s reg=0x%x value=0x%x",
+		__get_str(device_name), __entry->offset, __entry->value
+	)
+);
+
+TRACE_EVENT(kgsl_popp_level,
+
+	TP_PROTO(struct kgsl_device *device, int level1, int level2),
+
+	TP_ARGS(device, level1, level2),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(int, level1)
+		__field(int, level2)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->level1 = level1;
+		__entry->level2 = level2;
+	),
+
+	TP_printk(
+		"d_name=%s old level=%d new level=%d",
+		__get_str(device_name), __entry->level1, __entry->level2)
+);
+
+TRACE_EVENT(kgsl_popp_mod,
+
+	TP_PROTO(struct kgsl_device *device, int x, int y),
+
+	TP_ARGS(device, x, y),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(int, x)
+		__field(int, y)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->x = x;
+		__entry->y = y;
+	),
+
+	TP_printk(
+		"d_name=%s GPU busy mod=%d bus busy mod=%d",
+		__get_str(device_name), __entry->x, __entry->y)
+);
+
+TRACE_EVENT(kgsl_popp_nap,
+
+	TP_PROTO(struct kgsl_device *device, int t, int nap, int percent),
+
+	TP_ARGS(device, t, nap, percent),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(int, t)
+		__field(int, nap)
+		__field(int, percent)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->t = t;
+		__entry->nap = nap;
+		__entry->percent = percent;
+	),
+
+	TP_printk(
+		"d_name=%s nap time=%d number of naps=%d percentage=%d",
+		__get_str(device_name), __entry->t, __entry->nap,
+			__entry->percent)
+);
+
+TRACE_EVENT(kgsl_register_event,
+		TP_PROTO(unsigned int id, unsigned int timestamp, void *func),
+		TP_ARGS(id, timestamp, func),
+		TP_STRUCT__entry(
+			__field(unsigned int, id)
+			__field(unsigned int, timestamp)
+			__field(void *, func)
+		),
+		TP_fast_assign(
+			__entry->id = id;
+			__entry->timestamp = timestamp;
+			__entry->func = func;
+		),
+		TP_printk(
+			"ctx=%u ts=%u cb=%pF",
+			__entry->id, __entry->timestamp, __entry->func)
+);
+
+TRACE_EVENT(kgsl_fire_event,
+		TP_PROTO(unsigned int id, unsigned int ts,
+			unsigned int type, unsigned int age, void *func),
+		TP_ARGS(id, ts, type, age, func),
+		TP_STRUCT__entry(
+			__field(unsigned int, id)
+			__field(unsigned int, ts)
+			__field(unsigned int, type)
+			__field(unsigned int, age)
+			__field(void *, func)
+		),
+		TP_fast_assign(
+			__entry->id = id;
+			__entry->ts = ts;
+			__entry->type = type;
+			__entry->age = age;
+			__entry->func = func;
+		),
+		TP_printk(
+			"ctx=%u ts=%u type=%s age=%u cb=%pF",
+			__entry->id, __entry->ts,
+			__print_symbolic(__entry->type, KGSL_EVENT_TYPES),
+			__entry->age, __entry->func)
+);
+
+TRACE_EVENT(kgsl_active_count,
+
+	TP_PROTO(struct kgsl_device *device, unsigned long ip),
+
+	TP_ARGS(device, ip),
+
+	TP_STRUCT__entry(
+		__string(device_name, device->name)
+		__field(unsigned int, count)
+		__field(unsigned long, ip)
+	),
+
+	TP_fast_assign(
+		__assign_str(device_name, device->name);
+		__entry->count = atomic_read(&device->active_cnt);
+		__entry->ip = ip;
+	),
+
+	TP_printk(
+		"d_name=%s active_cnt=%u func=%pf",
+		__get_str(device_name), __entry->count, (void *) __entry->ip
+	)
+);
+
+TRACE_EVENT(kgsl_pagetable_destroy,
+	TP_PROTO(u64 ptbase, unsigned int name),
+	TP_ARGS(ptbase, name),
+	TP_STRUCT__entry(
+		__field(u64, ptbase)
+		__field(unsigned int, name)
+	),
+	TP_fast_assign(
+		__entry->ptbase = ptbase;
+		__entry->name = name;
+	),
+	TP_printk("ptbase=%llx name=%u", __entry->ptbase, __entry->name)
+);
+
+DECLARE_EVENT_CLASS(syncpoint_timestamp_template,
+	TP_PROTO(struct kgsl_drawobj_sync *syncobj,
+		struct kgsl_context *context,
+		unsigned int timestamp),
+	TP_ARGS(syncobj, context, timestamp),
+	TP_STRUCT__entry(
+		__field(unsigned int, syncobj_context_id)
+		__field(unsigned int, context_id)
+		__field(unsigned int, timestamp)
+	),
+	TP_fast_assign(
+		__entry->syncobj_context_id = syncobj->base.context->id;
+		__entry->context_id = context->id;
+		__entry->timestamp = timestamp;
+	),
+	TP_printk("ctx=%d sync ctx=%d ts=%d",
+		__entry->syncobj_context_id, __entry->context_id,
+		__entry->timestamp)
+);
+
+DEFINE_EVENT(syncpoint_timestamp_template, syncpoint_timestamp,
+	TP_PROTO(struct kgsl_drawobj_sync *syncobj,
+		struct kgsl_context *context,
+		unsigned int timestamp),
+	TP_ARGS(syncobj, context, timestamp)
+);
+
+DEFINE_EVENT(syncpoint_timestamp_template, syncpoint_timestamp_expire,
+	TP_PROTO(struct kgsl_drawobj_sync *syncobj,
+		struct kgsl_context *context,
+		unsigned int timestamp),
+	TP_ARGS(syncobj, context, timestamp)
+);
+
+DECLARE_EVENT_CLASS(syncpoint_fence_template,
+	TP_PROTO(struct kgsl_drawobj_sync *syncobj, char *name),
+	TP_ARGS(syncobj, name),
+	TP_STRUCT__entry(
+		__string(fence_name, name)
+		__field(unsigned int, syncobj_context_id)
+	),
+	TP_fast_assign(
+		__entry->syncobj_context_id = syncobj->base.context->id;
+		__assign_str(fence_name, name);
+	),
+	TP_printk("ctx=%d fence=%s",
+		__entry->syncobj_context_id, __get_str(fence_name))
+);
+
+DEFINE_EVENT(syncpoint_fence_template, syncpoint_fence,
+	TP_PROTO(struct kgsl_drawobj_sync *syncobj, char *name),
+	TP_ARGS(syncobj, name)
+);
+
+DEFINE_EVENT(syncpoint_fence_template, syncpoint_fence_expire,
+	TP_PROTO(struct kgsl_drawobj_sync *syncobj, char *name),
+	TP_ARGS(syncobj, name)
+);
+
+TRACE_EVENT(kgsl_msg,
+	TP_PROTO(const char *msg),
+	TP_ARGS(msg),
+	TP_STRUCT__entry(
+		__string(msg, msg)
+	),
+	TP_fast_assign(
+		__assign_str(msg, msg);
+	),
+	TP_printk(
+		"%s", __get_str(msg)
+	)
+);
+
+DECLARE_EVENT_CLASS(sparse_alloc_template,
+	TP_PROTO(unsigned int id, uint64_t size, unsigned int pagesize),
+	TP_ARGS(id, size, pagesize),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+		__field(uint64_t, size)
+		__field(unsigned int, pagesize)
+	),
+	TP_fast_assign(
+		__entry->id = id;
+		__entry->size = size;
+		__entry->pagesize = pagesize;
+	),
+	TP_printk("id=%d size=0x%llX pagesize=0x%X",
+		__entry->id, __entry->size, __entry->pagesize)
+);
+
+DEFINE_EVENT(sparse_alloc_template, sparse_phys_alloc,
+	TP_PROTO(unsigned int id, uint64_t size, unsigned int pagesize),
+	TP_ARGS(id, size, pagesize)
+);
+
+DEFINE_EVENT(sparse_alloc_template, sparse_virt_alloc,
+	TP_PROTO(unsigned int id, uint64_t size, unsigned int pagesize),
+	TP_ARGS(id, size, pagesize)
+);
+
+DECLARE_EVENT_CLASS(sparse_free_template,
+	TP_PROTO(unsigned int id),
+	TP_ARGS(id),
+	TP_STRUCT__entry(
+		__field(unsigned int, id)
+	),
+	TP_fast_assign(
+		__entry->id = id;
+	),
+	TP_printk("id=%d", __entry->id)
+);
+
+DEFINE_EVENT(sparse_free_template, sparse_phys_free,
+	TP_PROTO(unsigned int id),
+	TP_ARGS(id)
+);
+
+DEFINE_EVENT(sparse_free_template, sparse_virt_free,
+	TP_PROTO(unsigned int id),
+	TP_ARGS(id)
+);
+
+TRACE_EVENT(sparse_bind,
+	TP_PROTO(unsigned int v_id, uint64_t v_off,
+		unsigned int p_id, uint64_t p_off,
+		uint64_t size, uint64_t flags),
+	TP_ARGS(v_id, v_off, p_id, p_off, size, flags),
+	TP_STRUCT__entry(
+		__field(unsigned int, v_id)
+		__field(uint64_t, v_off)
+		__field(unsigned int, p_id)
+		__field(uint64_t, p_off)
+		__field(uint64_t, size)
+		__field(uint64_t, flags)
+	),
+	TP_fast_assign(
+		__entry->v_id = v_id;
+		__entry->v_off = v_off;
+		__entry->p_id = p_id;
+		__entry->p_off = p_off;
+		__entry->size = size;
+		__entry->flags = flags;
+	),
+	TP_printk(
+	"v_id=%d v_off=0x%llX p_id=%d p_off=0x%llX size=0x%llX flags=0x%llX",
+		__entry->v_id, __entry->v_off,
+		__entry->p_id, __entry->p_off,
+		__entry->size, __entry->flags)
+);
+
+TRACE_EVENT(sparse_unbind,
+	TP_PROTO(unsigned int v_id, uint64_t v_off, uint64_t size),
+	TP_ARGS(v_id, v_off, size),
+	TP_STRUCT__entry(
+		__field(unsigned int, v_id)
+		__field(uint64_t, v_off)
+		__field(uint64_t, size)
+	),
+	TP_fast_assign(
+		__entry->v_id = v_id;
+		__entry->v_off = v_off;
+		__entry->size = size;
+	),
+	TP_printk("v_id=%d v_off=0x%llX size=0x%llX",
+		__entry->v_id, __entry->v_off, __entry->size)
+);
+
+
+TRACE_EVENT(kgsl_clock_throttling,
+	TP_PROTO(
+		int idle_10pct,
+		int crc_50pct,
+		int crc_more50pct,
+		int crc_less50pct,
+		int adj
+	),
+	TP_ARGS(
+		idle_10pct,
+		crc_50pct,
+		crc_more50pct,
+		crc_less50pct,
+		adj
+	),
+	TP_STRUCT__entry(
+		__field(int, idle_10pct)
+		__field(int, crc_50pct)
+		__field(int, crc_more50pct)
+		__field(int, crc_less50pct)
+		__field(int, adj)
+	),
+	TP_fast_assign(
+		__entry->idle_10pct = idle_10pct;
+		__entry->crc_50pct = crc_50pct;
+		__entry->crc_more50pct = crc_more50pct;
+		__entry->crc_less50pct = crc_less50pct;
+		__entry->adj = adj;
+	),
+	TP_printk("idle_10=%d crc_50=%d crc_more50=%d crc_less50=%d adj=%d",
+		__entry->idle_10pct, __entry->crc_50pct, __entry->crc_more50pct,
+		__entry->crc_less50pct, __entry->adj
+	)
+);
+
+#endif /* _KGSL_TRACE_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/drivers/video/Kconfig b/drivers/video/Kconfig
index 00bc52b..0071517 100644
--- a/drivers/video/Kconfig
+++ b/drivers/video/Kconfig
@@ -17,6 +17,7 @@
 
 source "drivers/gpu/host1x/Kconfig"
 source "drivers/gpu/ipu-v3/Kconfig"
+source "drivers/gpu/msm/Kconfig"
 
 source "drivers/gpu/drm/Kconfig"