Merge "sched: Fix clear NOHZ_BALANCE_KICK"
diff --git a/Documentation/cpu-freq/governors.txt b/Documentation/cpu-freq/governors.txt
index b4ae5e6..92bbd16 100644
--- a/Documentation/cpu-freq/governors.txt
+++ b/Documentation/cpu-freq/governors.txt
@@ -234,7 +234,17 @@
 
 above_hispeed_delay: When speed is at or above hispeed_freq, wait for
 this long before raising speed in response to continued high load.
-Default is 20000 uS.
+The format is a single delay value, optionally followed by pairs of
+CPU speeds and the delay to use at or above those speeds.  Colons can
+be used between the speeds and associated delays for readability.  For
+example:
+
+   80000 1300000:200000 1500000:40000
+
+uses delay 80000 uS until CPU speed 1.3 GHz, at which speed delay
+200000 uS is used until speed 1.5 GHz, at which speed (and above)
+delay 40000 uS is used.  If speeds are specified these must appear in
+ascending order.  Default is 20000 uS.
 
 timer_rate: Sample rate for reevaluating CPU load when the CPU is not
 idle.  A deferrable timer is used, such that the CPU will not be woken
diff --git a/Documentation/devicetree/bindings/arm/msm/msm_bus.txt b/Documentation/devicetree/bindings/arm/msm/msm_bus.txt
index fbf1a1f..6283a82 100644
--- a/Documentation/devicetree/bindings/arm/msm/msm_bus.txt
+++ b/Documentation/devicetree/bindings/arm/msm/msm_bus.txt
@@ -101,7 +101,7 @@
 			other parameters used in Limiter and Regular mode
 			for static BKE configuration. It is defined in KBps.
 qcom,bimc,gp:		Grant Period for configuring a master in limiter
-			mode. This is an integer value in micro-seconds.
+			mode. This is an integer value in nano-seconds.
 qcom,bimc,thmp:		Medium threshold percentage for BIMC masters.
 			This percentage is used to calculate medium threshold
 			value for BIMC Masters in Limiter mode for static
diff --git a/Documentation/devicetree/bindings/arm/msm/msm_ion.txt b/Documentation/devicetree/bindings/arm/msm/msm_ion.txt
index 2d83614..f3cf8f3 100644
--- a/Documentation/devicetree/bindings/arm/msm/msm_ion.txt
+++ b/Documentation/devicetree/bindings/arm/msm/msm_ion.txt
@@ -17,6 +17,16 @@
 Required properties for Ion heaps
 
 - reg: The ID of the ION heap.
+- qcom,ion-heap-type: The heap type to use for this heap. Should be one of
+  the following:
+    - "SYSTEM"
+    - "SYSTEM_CONTIG"
+    - "CARVEOUT"
+    - "CHUNK"
+    - "CP"
+    - "DMA"
+    - "SECURE_DMA"
+    - "REMOVED"
 
 Optional properties for Ion heaps
 
@@ -34,16 +44,17 @@
                  #address-cells = <1>;
                  #size-cells = <0>;
 
-                 qcom,ion-heap@30 { /* SYSTEM HEAP */
-                         reg = <30>;
+                 qcom,ion-heap@25 {
+                         reg = <25>;
+                         qcom,ion-heap-type = "SYSTEM";
                  };
 
                  qcom,ion-heap@8 { /* CP_MM HEAP */
                          compatible = "qcom,msm-ion-reserve";
                          reg = <8>;
                          qcom,heap-align = <0x1000>;
-                         qcom,memory-reservation-type = "EBI1"; /* reserve EBI memory */
-                         qcom,memory-reservation-size = <0x7800000>;
+                         linux,contiguous-region = <&secure_mem>;
+                         qcom,ion-heap-type = "SECURE_DMA";
                  };
 
                  qcom,ion-heap@29 { /* FIRMWARE HEAP */
@@ -53,5 +64,6 @@
                          qcom,heap-adjacent = <8>;
                          qcom,memory-reservation-type = "EBI1"; /* reserve EBI memory */
                          qcom,memory-reservation-size = <0xA00000>;
-
+                         qcom,ion-heap-type = "CARVEOUT";
+                 };
 	};
diff --git a/arch/arm/boot/dts/apq8074-v1-ion.dtsi b/arch/arm/boot/dts/apq8074-v1-ion.dtsi
index 49d7ee1..3611132 100644
--- a/arch/arm/boot/dts/apq8074-v1-ion.dtsi
+++ b/arch/arm/boot/dts/apq8074-v1-ion.dtsi
@@ -17,6 +17,7 @@
 			reg = <23>;
 			qcom,heap-align = <0x1000>;
 			qcom,memory-fixed = <0x0dc00000 0x1e00000>;
+			qcom,ion-heap-type = "CARVEOUT";
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/apq8074-v2.0-1-ion.dtsi b/arch/arm/boot/dts/apq8074-v2.0-1-ion.dtsi
index 49d7ee1..3611132 100644
--- a/arch/arm/boot/dts/apq8074-v2.0-1-ion.dtsi
+++ b/arch/arm/boot/dts/apq8074-v2.0-1-ion.dtsi
@@ -17,6 +17,7 @@
 			reg = <23>;
 			qcom,heap-align = <0x1000>;
 			qcom,memory-fixed = <0x0dc00000 0x1e00000>;
+			qcom,ion-heap-type = "CARVEOUT";
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/apq8074-v2.2-ion.dtsi b/arch/arm/boot/dts/apq8074-v2.2-ion.dtsi
index 49d7ee1..3611132 100644
--- a/arch/arm/boot/dts/apq8074-v2.2-ion.dtsi
+++ b/arch/arm/boot/dts/apq8074-v2.2-ion.dtsi
@@ -17,6 +17,7 @@
 			reg = <23>;
 			qcom,heap-align = <0x1000>;
 			qcom,memory-fixed = <0x0dc00000 0x1e00000>;
+			qcom,ion-heap-type = "CARVEOUT";
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/apq8084-ion.dtsi b/arch/arm/boot/dts/apq8084-ion.dtsi
index ea954b8..167b8b7 100644
--- a/arch/arm/boot/dts/apq8084-ion.dtsi
+++ b/arch/arm/boot/dts/apq8084-ion.dtsi
@@ -16,16 +16,14 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		qcom,ion-heap@30 { /* SYSTEM HEAP */
-			reg = <30>;
-		};
-
-		qcom,ion-heap@21 { /* SYSTEM CONTIG HEAP */
-			reg = <21>;
-		};
-
-		qcom,ion-heap@25 { /* IOMMU HEAP */
+		qcom,ion-heap@25 {
 			reg = <25>;
+			qcom,ion-heap-type = "SYSTEM";
+		};
+
+		qcom,ion-heap@21 {
+			reg = <21>;
+			qcom,ion-heap-type = "SYSTEM_CONTIG";
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/fsm9900.dtsi b/arch/arm/boot/dts/fsm9900.dtsi
index 1c48bf0..705a512 100644
--- a/arch/arm/boot/dts/fsm9900.dtsi
+++ b/arch/arm/boot/dts/fsm9900.dtsi
@@ -85,8 +85,9 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		qcom,ion-heap@30 { /* SYSTEM HEAP */
-			reg = <30>;
+		qcom,ion-heap@25 {
+			reg = <25>;
+			qcom,ion-heap-type = "SYSTEM";
 		};
 	};
 
diff --git a/arch/arm/boot/dts/mpq8092-ion.dtsi b/arch/arm/boot/dts/mpq8092-ion.dtsi
index f9f5985..903610d 100644
--- a/arch/arm/boot/dts/mpq8092-ion.dtsi
+++ b/arch/arm/boot/dts/mpq8092-ion.dtsi
@@ -16,16 +16,14 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		qcom,ion-heap@30 { /* SYSTEM HEAP */
-			reg = <30>;
-		};
-
-		qcom,ion-heap@21 { /* SYSTEM CONTIG HEAP */
-			reg = <21>;
-		};
-
-		qcom,ion-heap@25 { /* IOMMU HEAP */
+		qcom,ion-heap@25 {
 			reg = <25>;
+			qcom,ion-heap-type = "SYSTEM";
+		};
+
+		qcom,ion-heap@21 {
+			reg = <21>;
+			qcom,ion-heap-type = "SYSTEM_CONTIG";
 		};
 
 	};
diff --git a/arch/arm/boot/dts/msm8226-ion.dtsi b/arch/arm/boot/dts/msm8226-ion.dtsi
index 30c3209..06e2779 100644
--- a/arch/arm/boot/dts/msm8226-ion.dtsi
+++ b/arch/arm/boot/dts/msm8226-ion.dtsi
@@ -16,12 +16,14 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		qcom,ion-heap@30 { /* SYSTEM HEAP */
-			reg = <30>;
+		qcom,ion-heap@25 {
+			reg = <25>;
+			qcom,ion-heap-type = "SYSTEM";
 		};
 
-		qcom,ion-heap@21 { /* SYSTEM CONTIG HEAP */
+		qcom,ion-heap@21 {
 			reg = <21>;
+			qcom,ion-heap-type = "SYSTEM_CONTIG";
 		};
 
 		qcom,ion-heap@8 { /* CP_MM HEAP */
@@ -29,10 +31,7 @@
 			reg = <8>;
 			qcom,heap-align = <0x1000>;
 			linux,contiguous-region = <&secure_mem>;
-		};
-
-		qcom,ion-heap@25 { /* IOMMU HEAP */
-			reg = <25>;
+			qcom,ion-heap-type = "SECURE_DMA";
 		};
 
 		qcom,ion-heap@22 { /* adsp heap */
@@ -40,12 +39,14 @@
 			reg = <22>;
 			qcom,heap-align = <0x1000>;
 			linux,contiguous-region = <&adsp_mem>;
+			qcom,ion-heap-type = "DMA";
 		};
 
 		qcom,ion-heap@27 { /* QSECOM HEAP */
 			compatible = "qcom,msm-ion-reserve";
 			reg = <27>;
 			linux,contiguous-region = <&qsecom_mem>;
+			qcom,ion-heap-type = "DMA";
 		};
 
 		qcom,ion-heap@28 { /* AUDIO HEAP */
@@ -54,6 +55,7 @@
 			qcom,heap-align = <0x1000>;
 			qcom,memory-reservation-type = "EBI1"; /* reserve EBI memory */
 			qcom,memory-reservation-size = <0x314000>;
+			qcom,ion-heap-type = "CARVEOUT";
 		};
 
 		qcom,ion-heap@23 { /* OTHER PIL HEAP */
@@ -61,6 +63,7 @@
 			reg = <23>;
 			qcom,heap-align = <0x1000>;
 			qcom,memory-fixed = <0x0dc00000 0x1900000>;
+			qcom,ion-heap-type = "CARVEOUT";
 		};
 
 	};
diff --git a/arch/arm/boot/dts/msm8610-bus.dtsi b/arch/arm/boot/dts/msm8610-bus.dtsi
index c6e81d8..54c698c 100644
--- a/arch/arm/boot/dts/msm8610-bus.dtsi
+++ b/arch/arm/boot/dts/msm8610-bus.dtsi
@@ -941,7 +941,7 @@
 			qcom,thresh = <800000>;
 			qcom,dual-conf;
 			qcom,bimc,bw = <300000>;
-			qcom,bimc,gp = <5>;
+			qcom,bimc,gp = <5000>;
 			qcom,bimc,thmp = <50>;
 		};
 
diff --git a/arch/arm/boot/dts/msm8610-ion.dtsi b/arch/arm/boot/dts/msm8610-ion.dtsi
index 77cd582..601f8ed 100644
--- a/arch/arm/boot/dts/msm8610-ion.dtsi
+++ b/arch/arm/boot/dts/msm8610-ion.dtsi
@@ -16,22 +16,21 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		qcom,ion-heap@30 { /* SYSTEM HEAP */
-			reg = <30>;
-		};
-
-		qcom,ion-heap@21 { /* SYSTEM CONTIG HEAP */
-			reg = <21>;
-		};
-
-		qcom,ion-heap@25 { /* IOMMU HEAP */
+		qcom,ion-heap@25 {
 			reg = <25>;
+			qcom,ion-heap-type = "SYSTEM";
+		};
+
+		qcom,ion-heap@21 {
+			reg = <21>;
+			qcom,ion-heap-type = "SYSTEM_CONTIG";
 		};
 
 		qcom,ion-heap@27 { /* QSECOM HEAP */
 			compatible = "qcom,msm-ion-reserve";
 			reg = <27>;
 			linux,contiguous-region = <&qsecom_mem>;
+			qcom,ion-heap-type = "DMA";
 		};
 
 		qcom,ion-heap@23 { /* OTHER PIL HEAP */
@@ -39,6 +38,7 @@
 			reg = <23>;
 			qcom,heap-align = <0x1000>;
 			qcom,memory-fixed = <0x0c500000 0x1300000>;
+			qcom,ion-heap-type = "CARVEOUT";
 		};
 
 		qcom,ion-heap@26 { /* MODEM HEAP */
@@ -46,6 +46,7 @@
 			reg = <26>;
 			qcom,heap-align = <0x1000>;
 			qcom,memory-fixed = <0x08800000 0x3d00000>;
+			qcom,ion-heap-type = "CARVEOUT";
 		};
 
 	};
diff --git a/arch/arm/boot/dts/msm8974-bus.dtsi b/arch/arm/boot/dts/msm8974-bus.dtsi
index 609a1b3..af51327 100644
--- a/arch/arm/boot/dts/msm8974-bus.dtsi
+++ b/arch/arm/boot/dts/msm8974-bus.dtsi
@@ -1168,18 +1168,12 @@
 			qcom,masterp = <0>;
 			qcom,tier = <2>;
 			qcom,hw-sel = "BIMC";
-			qcom,mode = "Limiter";
+			qcom,mode = "Fixed";
 			qcom,qport = <0>;
 			qcom,ws = <10000>;
 			qcom,mas-hw-id = <0>;
 			qcom,prio-rd = <0>;
 			qcom,prio-wr = <0>;
-			qcom,mode-thresh = "Fixed";
-			qcom,thresh = <2000000>;
-			qcom,dual-conf;
-			qcom,bimc,bw = <300000>;
-			qcom,bimc,gp = <5>;
-			qcom,bimc,thmp = <50>;
 		};
 
 		mas-ampss-m1 {
@@ -1188,18 +1182,12 @@
 			qcom,masterp = <1>;
 			qcom,tier = <2>;
 			qcom,hw-sel = "BIMC";
-			qcom,mode = "Limiter";
+			qcom,mode = "Fixed";
 			qcom,qport = <1>;
 			qcom,ws = <10000>;
 			qcom,mas-hw-id = <0>;
 			qcom,prio-rd = <0>;
 			qcom,prio-wr = <0>;
-			qcom,mode-thresh = "Fixed";
-			qcom,thresh = <2000000>;
-			qcom,dual-conf;
-			qcom,bimc,bw = <300000>;
-			qcom,bimc,gp = <5>;
-			qcom,bimc,thmp = <50>;
 		};
 
 		mas-mss-proc {
diff --git a/arch/arm/boot/dts/msm8974-ion.dtsi b/arch/arm/boot/dts/msm8974-ion.dtsi
index 455ed2d..5829f05 100644
--- a/arch/arm/boot/dts/msm8974-ion.dtsi
+++ b/arch/arm/boot/dts/msm8974-ion.dtsi
@@ -16,12 +16,14 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		qcom,ion-heap@30 { /* SYSTEM HEAP */
-			reg = <30>;
+		qcom,ion-heap@25 {
+			reg = <25>;
+			qcom,ion-heap-type = "SYSTEM";
 		};
 
-		qcom,ion-heap@21 { /* SYSTEM CONTIG HEAP */
+		qcom,ion-heap@21 {
 			reg = <21>;
+			qcom,ion-heap-type = "SYSTEM_CONTIG";
 		};
 
 		qcom,ion-heap@8 { /* CP_MM HEAP */
@@ -29,6 +31,7 @@
 			reg = <8>;
 			qcom,heap-align = <0x1000>;
 			linux,contiguous-region = <&secure_mem>;
+			qcom,ion-heap-type = "SECURE_DMA";
 		};
 
 		qcom,ion-heap@22 { /* adsp heap */
@@ -36,16 +39,14 @@
 			reg = <22>;
 			qcom,heap-align = <0x1000>;
 			linux,contiguous-region = <&adsp_mem>;
-		};
-
-		qcom,ion-heap@25 { /* IOMMU HEAP */
-			reg = <25>;
+			qcom,ion-heap-type = "DMA";
 		};
 
 		qcom,ion-heap@27 { /* QSECOM HEAP */
 			compatible = "qcom,msm-ion-reserve";
 			reg = <27>;
 			linux,contiguous-region = <&qsecom_mem>;
+			qcom,ion-heap-type = "DMA";
 		};
 
 		qcom,ion-heap@28 { /* AUDIO HEAP */
@@ -54,6 +55,7 @@
 			qcom,heap-align = <0x1000>;
 			qcom,memory-reservation-type = "EBI1"; /* reserve EBI memory */
 			qcom,memory-reservation-size = <0x614000>;
+			qcom,ion-heap-type = "CARVEOUT";
 		};
 
 		qcom,ion-heap@23 { /* OTHER PIL HEAP */
@@ -61,6 +63,7 @@
 			reg = <23>;
 			qcom,heap-align = <0x1000>;
 			qcom,memory-fixed = <0x05d00000 0x1e00000>;
+			qcom,ion-heap-type = "CARVEOUT";
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/msm8974pro-ion.dtsi b/arch/arm/boot/dts/msm8974pro-ion.dtsi
index 4c427bf..3bb885a 100644
--- a/arch/arm/boot/dts/msm8974pro-ion.dtsi
+++ b/arch/arm/boot/dts/msm8974pro-ion.dtsi
@@ -18,6 +18,7 @@
 			reg = <23>;
 			qcom,heap-align = <0x1000>;
 			qcom,memory-fixed = <0x05a00000 0x2100000>;
+			qcom,ion-heap-type = "CARVEOUT";
 		};
 
 		qcom,ion-heap@26 { /* MODEM HEAP */
@@ -25,6 +26,7 @@
 			reg = <26>;
 			qcom,heap-align = <0x1000>;
 			qcom,memory-fixed = <0x08000000 0x5000000>;
+			qcom,ion-heap-type = "CARVEOUT";
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/msm8974pro.dtsi b/arch/arm/boot/dts/msm8974pro.dtsi
index 85c2fe3..6b53562 100644
--- a/arch/arm/boot/dts/msm8974pro.dtsi
+++ b/arch/arm/boot/dts/msm8974pro.dtsi
@@ -1550,21 +1550,21 @@
 
 	qcom,msm-cpufreq@0 {
 		qcom,cpufreq-table =
-			<  300000  300000  600 /*  75 MHz */ >,
-			<  422400  422400 1200 /* 150 MHz */ >,
-			<  652800  499200 1600 /* 200 MHz */ >,
-			<  729600  576000 2456 /* 307 MHz */ >,
-			<  883200  576000 2456 /* 307 MHz */ >,
-			<  960000  960000 3680 /* 460 MHz */ >,
-			< 1036800 1036800 3680 /* 460 MHz */ >,
-			< 1190400 1036800 3680 /* 460 MHz */ >,
-			< 1267200 1267200 4912 /* 614 MHz */ >,
-			< 1497600 1497600 4912 /* 614 MHz */ >,
-			< 1574400 1574400 6400 /* 800 MHz */ >,
-			< 1728000 1651200 6400 /* 800 MHz */ >,
-			< 1958400 1728000 7448 /* 931 MHz */ >,
-			< 2265600 1728000 7448 /* 931 MHz */ >,
-			< 2457600 1728000 7448 /* 931 MHz */ >;
+			<  300000  300000 300 /* 37.5 MHz */ >,
+			<  422400  422400 300 /* 37.5 MHz */ >,
+			<  652800  499200 300 /* 37.5 MHz */ >,
+			<  729600  576000 300 /* 37.5 MHz */ >,
+			<  883200  576000 300 /* 37.5 MHz */ >,
+			<  960000  960000 300 /* 37.5 MHz */ >,
+			< 1036800 1036800 300 /* 37.5 MHz */ >,
+			< 1190400 1036800 300 /* 37.5 MHz */ >,
+			< 1267200 1267200 300 /* 37.5 MHz */ >,
+			< 1497600 1497600 300 /* 37.5 MHz */ >,
+			< 1574400 1574400 300 /* 37.5 MHz */ >,
+			< 1728000 1651200 300 /* 37.5 MHz */ >,
+			< 1958400 1728000 300 /* 37.5 MHz */ >,
+			< 2265600 1728000 300 /* 37.5 MHz */ >,
+			< 2496000 1728000 300 /* 37.5 MHz */ >;
 	};
 };
 
diff --git a/arch/arm/boot/dts/msm9625-ion.dtsi b/arch/arm/boot/dts/msm9625-ion.dtsi
index 2a3e4b5..3ef0b3f 100644
--- a/arch/arm/boot/dts/msm9625-ion.dtsi
+++ b/arch/arm/boot/dts/msm9625-ion.dtsi
@@ -16,12 +16,9 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		qcom,ion-heap@30 { /* SYSTEM HEAP */
-			reg = <30>;
-		};
-
-		qcom,ion-heap@25 { /* IOMMU HEAP */
+		qcom,ion-heap@25 {
 			reg = <25>;
+			qcom,ion-heap-type = "SYSTEM";
 		};
 
 		qcom,ion-heap@28 { /* AUDIO HEAP */
@@ -30,6 +27,7 @@
 			qcom,heap-align = <0x1000>;
 			qcom,memory-reservation-type = "EBI1"; /* reserve EBI memory */
 			qcom,memory-reservation-size = <0xAF000>;
+			qcom,ion-heap-type = "CARVEOUT";
 		};
 	};
 };
diff --git a/arch/arm/boot/dts/msmsamarium-ion.dtsi b/arch/arm/boot/dts/msmsamarium-ion.dtsi
index ea954b8..167b8b7 100644
--- a/arch/arm/boot/dts/msmsamarium-ion.dtsi
+++ b/arch/arm/boot/dts/msmsamarium-ion.dtsi
@@ -16,16 +16,14 @@
 		#address-cells = <1>;
 		#size-cells = <0>;
 
-		qcom,ion-heap@30 { /* SYSTEM HEAP */
-			reg = <30>;
-		};
-
-		qcom,ion-heap@21 { /* SYSTEM CONTIG HEAP */
-			reg = <21>;
-		};
-
-		qcom,ion-heap@25 { /* IOMMU HEAP */
+		qcom,ion-heap@25 {
 			reg = <25>;
+			qcom,ion-heap-type = "SYSTEM";
+		};
+
+		qcom,ion-heap@21 {
+			reg = <21>;
+			qcom,ion-heap-type = "SYSTEM_CONTIG";
 		};
 	};
 };
diff --git a/arch/arm/mach-msm/Makefile b/arch/arm/mach-msm/Makefile
index eacdcdf..3079b64 100644
--- a/arch/arm/mach-msm/Makefile
+++ b/arch/arm/mach-msm/Makefile
@@ -431,3 +431,5 @@
 obj-$(CONFIG_WALL_CLK) += wallclk.o
 obj-$(CONFIG_WALL_CLK_SYSFS) += wallclk_sysfs.o
 obj-$(CONFIG_ARCH_RANDOM) += early_random.o
+obj-$(CONFIG_PERFMAP) += perfmap.o
+obj-$(CONFIG_ARCH_MSM8974) += cpubw-krait.o
diff --git a/arch/arm/mach-msm/cpubw-krait.c b/arch/arm/mach-msm/cpubw-krait.c
new file mode 100644
index 0000000..4108754
--- /dev/null
+++ b/arch/arm/mach-msm/cpubw-krait.c
@@ -0,0 +1,472 @@
+/*
+ * Copyright (c) 2013, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "cpubw-krait: " fmt
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/delay.h>
+#include <linux/ktime.h>
+#include <linux/time.h>
+#include <linux/err.h>
+#include <linux/errno.h>
+#include <linux/mutex.h>
+#include <linux/interrupt.h>
+#include <trace/events/power.h>
+#include <mach/msm_bus.h>
+#include <mach/msm_bus_board.h>
+
+#include <mach/msm-krait-l2-accessors.h>
+
+#define L2PMRESR2		0x412
+#define L2PMCR			0x400
+#define L2PMCNTENCLR		0x402
+#define L2PMCNTENSET		0x403
+#define L2PMINTENCLR		0x404
+#define L2PMINTENSET		0x405
+#define L2PMOVSR		0x406
+#define L2PMOVSSET		0x407
+#define L2PMnEVCNTCR(n)		(0x420 + n * 0x10)
+#define L2PMnEVCNTR(n)		(0x421 + n * 0x10)
+#define L2PMnEVCNTSR(n)		(0x422 + n * 0x10)
+#define L2PMnEVFILTER(n)	(0x423 + n * 0x10)
+#define L2PMnEVTYPER(n)		(0x424 + n * 0x10)
+#define MON_INT			33
+
+#define MBYTE			(1 << 20)
+
+#define BW(_bw) \
+	{ \
+		.vectors = (struct msm_bus_vectors[]){ \
+			{\
+				.src = MSM_BUS_MASTER_AMPSS_M0, \
+				.dst = MSM_BUS_SLAVE_EBI_CH0, \
+			}, \
+			{ \
+				.src = MSM_BUS_MASTER_AMPSS_M1, \
+				.dst = MSM_BUS_SLAVE_EBI_CH0, \
+			}, \
+		}, \
+		.num_paths = 2, \
+	}
+
+/* Has to be a power of 2 to work correctly */
+static unsigned int bytes_per_beat = 8;
+module_param(bytes_per_beat, uint, 0644);
+
+static unsigned int sample_ms = 50;
+module_param(sample_ms, uint, 0644);
+
+static unsigned int tolerance_percent = 10;
+module_param(tolerance_percent, uint, 0644);
+
+static unsigned int guard_band_mbps = 100;
+module_param(guard_band_mbps, uint, 0644);
+
+static unsigned int decay_rate = 90;
+module_param(decay_rate, uint, 0644);
+
+static unsigned int io_percent = 15;
+module_param(io_percent, uint, 0644);
+
+static unsigned int bw_step = 200;
+module_param(bw_step, uint, 0644);
+
+static struct kernel_param_ops enable_ops;
+static bool enable;
+module_param_cb(enable, &enable_ops, &enable, S_IRUGO | S_IWUSR);
+
+static void mon_init(void)
+{
+	/* Set up counters 0/1 to count write/read beats */
+	set_l2_indirect_reg(L2PMRESR2, 0x8B0B0000);
+	set_l2_indirect_reg(L2PMnEVCNTCR(0), 0x0);
+	set_l2_indirect_reg(L2PMnEVCNTCR(1), 0x0);
+	set_l2_indirect_reg(L2PMnEVCNTR(0), 0xFFFFFFFF);
+	set_l2_indirect_reg(L2PMnEVCNTR(1), 0xFFFFFFFF);
+	set_l2_indirect_reg(L2PMnEVFILTER(0), 0xF003F);
+	set_l2_indirect_reg(L2PMnEVFILTER(1), 0xF003F);
+	set_l2_indirect_reg(L2PMnEVTYPER(0), 0xA);
+	set_l2_indirect_reg(L2PMnEVTYPER(1), 0xB);
+}
+
+static void global_mon_enable(bool en)
+{
+	u32 regval;
+
+	/* Global counter enable */
+	regval = get_l2_indirect_reg(L2PMCR);
+	if (en)
+		regval |= BIT(0);
+	else
+		regval &= ~BIT(0);
+	set_l2_indirect_reg(L2PMCR, regval);
+}
+
+static void mon_enable(int n)
+{
+	/* Clear previous overflow state for event counter n */
+	set_l2_indirect_reg(L2PMOVSR, BIT(n));
+
+	/* Enable event counter n */
+	set_l2_indirect_reg(L2PMCNTENSET, BIT(n));
+}
+
+static void mon_disable(int n)
+{
+	/* Disable event counter n */
+	set_l2_indirect_reg(L2PMCNTENCLR, BIT(n));
+}
+
+/* Returns start counter value to be used with mon_get_mbps() */
+static u32 mon_set_limit_mbyte(int n, unsigned int mbytes)
+{
+	u32 regval, beats;
+
+	beats = mult_frac(mbytes, MBYTE, bytes_per_beat);
+	regval = 0xFFFFFFFF - beats;
+	set_l2_indirect_reg(L2PMnEVCNTR(n), regval);
+	pr_debug("EV%d MB: %d, start val: %x\n", n, mbytes, regval);
+
+	return regval;
+}
+
+/* Returns MBps of read/writes for the sampling window. */
+static int mon_get_mbps(int n, u32 start_val, unsigned int us)
+{
+	u32 overflow, count;
+	long long beats;
+
+	count = get_l2_indirect_reg(L2PMnEVCNTR(n));
+	overflow = get_l2_indirect_reg(L2PMOVSR);
+
+	if (overflow & BIT(n))
+		beats = 0xFFFFFFFF - start_val + count;
+	else
+		beats = count - start_val;
+
+	beats *= USEC_PER_SEC;
+	beats *= bytes_per_beat;
+	do_div(beats, us);
+	beats = DIV_ROUND_UP_ULL(beats, MBYTE);
+
+	pr_debug("EV%d ov: %x, cnt: %x\n", n, overflow, count);
+
+	return beats;
+}
+
+static void do_bw_sample(struct work_struct *work);
+static DECLARE_DEFERRED_WORK(bw_sample, do_bw_sample);
+static struct workqueue_struct *bw_sample_wq;
+
+static DEFINE_MUTEX(bw_lock);
+static ktime_t prev_ts;
+static u32 prev_r_start_val;
+static u32 prev_w_start_val;
+
+static struct msm_bus_paths bw_levels[] = {
+	BW(0), BW(200),
+};
+static struct msm_bus_scale_pdata bw_data = {
+	.usecase = bw_levels,
+	.num_usecases = ARRAY_SIZE(bw_levels),
+	.name = "cpubw-krait",
+	.active_only = 1,
+};
+static u32 bus_client;
+static void compute_bw(int mbps);
+static irqreturn_t mon_intr_handler(int irq, void *dev_id);
+
+#define START_LIMIT	100 /* MBps */
+static int start_monitoring(void)
+{
+	int mb_limit;
+	int ret;
+
+	ret = request_threaded_irq(MON_INT, NULL, mon_intr_handler,
+			  IRQF_ONESHOT | IRQF_SHARED | IRQF_TRIGGER_RISING,
+			  "cpubw_krait", mon_intr_handler);
+	if (ret) {
+		pr_err("Unable to register interrupt handler\n");
+		return ret;
+	}
+
+	bw_sample_wq = alloc_workqueue("cpubw-krait", WQ_HIGHPRI, 0);
+	if (!bw_sample_wq) {
+		pr_err("Unable to alloc workqueue\n");
+		ret = -ENOMEM;
+		goto alloc_wq_fail;
+	}
+
+	bus_client = msm_bus_scale_register_client(&bw_data);
+	if (!bus_client) {
+		pr_err("Unable to register bus client\n");
+		ret = -ENODEV;
+		goto bus_reg_fail;
+	}
+
+	compute_bw(START_LIMIT);
+
+	mon_init();
+	mon_disable(0);
+	mon_disable(1);
+
+	mb_limit = mult_frac(START_LIMIT, sample_ms, MSEC_PER_SEC);
+	mb_limit /= 2;
+
+	prev_r_start_val = mon_set_limit_mbyte(0, mb_limit);
+	prev_w_start_val = mon_set_limit_mbyte(1, mb_limit);
+
+	prev_ts = ktime_get();
+
+	set_l2_indirect_reg(L2PMINTENSET, BIT(0));
+	set_l2_indirect_reg(L2PMINTENSET, BIT(1));
+	mon_enable(0);
+	mon_enable(1);
+	global_mon_enable(true);
+
+	queue_delayed_work(bw_sample_wq, &bw_sample,
+				msecs_to_jiffies(sample_ms));
+
+	return 0;
+
+bus_reg_fail:
+	destroy_workqueue(bw_sample_wq);
+alloc_wq_fail:
+	disable_irq(MON_INT);
+	free_irq(MON_INT, mon_intr_handler);
+	return ret;
+}
+
+static void stop_monitoring(void)
+{
+	global_mon_enable(false);
+	mon_disable(0);
+	mon_disable(1);
+	set_l2_indirect_reg(L2PMINTENCLR, BIT(0));
+	set_l2_indirect_reg(L2PMINTENCLR, BIT(1));
+
+	disable_irq(MON_INT);
+	free_irq(MON_INT, mon_intr_handler);
+
+	cancel_delayed_work_sync(&bw_sample);
+	destroy_workqueue(bw_sample_wq);
+
+	bw_levels[0].vectors[0].ib = 0;
+	bw_levels[0].vectors[0].ab = 0;
+	bw_levels[0].vectors[1].ib = 0;
+	bw_levels[0].vectors[1].ab = 0;
+
+	bw_levels[1].vectors[0].ib = 0;
+	bw_levels[1].vectors[0].ab = 0;
+	bw_levels[1].vectors[1].ib = 0;
+	bw_levels[1].vectors[1].ab = 0;
+	msm_bus_scale_unregister_client(bus_client);
+}
+
+static void set_bw(int mbps)
+{
+	static int cur_idx, cur_ab, cur_ib;
+	int new_ab, new_ib;
+	int i, ret;
+
+	if (!io_percent)
+		io_percent = 1;
+	new_ab = roundup(mbps, bw_step);
+	new_ib = mbps * 100 / io_percent;
+	new_ib = roundup(new_ib, bw_step);
+
+	if (cur_ib == new_ib && cur_ab == new_ab)
+		return;
+
+	i = (cur_idx + 1) % ARRAY_SIZE(bw_levels);
+
+	bw_levels[i].vectors[0].ib = new_ib * 1000000ULL;
+	bw_levels[i].vectors[0].ab = new_ab * 1000000ULL;
+	bw_levels[i].vectors[1].ib = new_ib * 1000000ULL;
+	bw_levels[i].vectors[1].ab = new_ab * 1000000ULL;
+
+	pr_debug("BW MBps: Req: %d AB: %d IB: %d\n", mbps, new_ab, new_ib);
+
+	ret = msm_bus_scale_client_update_request(bus_client, i);
+	if (ret)
+		pr_err("bandwidth request failed (%d)\n", ret);
+	else {
+		cur_idx = i;
+		cur_ib = new_ib;
+		cur_ab = new_ab;
+	}
+}
+
+static void compute_bw(int mbps)
+{
+	static int cur_bw;
+	int new_bw;
+
+	mbps += guard_band_mbps;
+
+	if (mbps > cur_bw) {
+		new_bw = mbps;
+	} else {
+		new_bw = mbps * decay_rate + cur_bw * (100 - decay_rate);
+		new_bw /= 100;
+	}
+
+	if (new_bw == cur_bw)
+		return;
+
+	set_bw(new_bw);
+	cur_bw = new_bw;
+}
+
+static int to_limit(int mbps)
+{
+	mbps *= (100 + tolerance_percent) * sample_ms;
+	mbps /= 100;
+	mbps = DIV_ROUND_UP(mbps, MSEC_PER_SEC);
+	return mbps;
+}
+
+static void measure_bw(void)
+{
+	int r_mbps, w_mbps, mbps;
+	ktime_t ts;
+	unsigned int us;
+
+	mutex_lock(&bw_lock);
+
+	/*
+	 * Since we are stopping the counters, we don't want this short work
+	 * to be interrupted by other tasks and cause the measurements to be
+	 * wrong. Not blocking interrupts to avoid affecting interrupt
+	 * latency and since they should be short anyway because they run in
+	 * atomic context.
+	 */
+	preempt_disable();
+
+	ts = ktime_get();
+	us = ktime_to_us(ktime_sub(ts, prev_ts));
+	if (!us)
+		us = 1;
+
+	mon_disable(0);
+	mon_disable(1);
+
+	r_mbps = mon_get_mbps(0, prev_r_start_val, us);
+	w_mbps = mon_get_mbps(1, prev_w_start_val, us);
+
+	prev_r_start_val = mon_set_limit_mbyte(0, to_limit(r_mbps));
+	prev_w_start_val = mon_set_limit_mbyte(1, to_limit(w_mbps));
+
+	mon_enable(0);
+	mon_enable(1);
+
+	preempt_enable();
+
+	mbps = r_mbps + w_mbps;
+	pr_debug("R/W/BW/us = %d/%d/%d/%d\n", r_mbps, w_mbps, mbps, us);
+	compute_bw(mbps);
+
+	prev_ts = ts;
+	mutex_unlock(&bw_lock);
+}
+
+static void do_bw_sample(struct work_struct *work)
+{
+	measure_bw();
+	queue_delayed_work(bw_sample_wq, &bw_sample,
+				msecs_to_jiffies(sample_ms));
+}
+
+static irqreturn_t mon_intr_handler(int irq, void *dev_id)
+{
+	bool pending;
+	u32 regval;
+
+	regval = get_l2_indirect_reg(L2PMOVSR);
+	pr_debug("Got interrupt: %x\n", regval);
+
+	pending = cancel_delayed_work_sync(&bw_sample);
+
+	/*
+	 * Don't recalc bandwidth if the interrupt came just after the end
+	 * of the sample period (!pending). This is done for two reasons:
+	 *
+	 * 1. Sampling the BW during a very short duration can result in a
+	 *    very inaccurate measurement due to very short bursts.
+	 * 2. If the limit was hit very close to the sample period, then the
+	 *    current BW estimate is not very off and can stay as such.
+	 */
+	if (pending)
+		measure_bw();
+
+	queue_delayed_work(bw_sample_wq, &bw_sample,
+				msecs_to_jiffies(sample_ms));
+
+	return IRQ_HANDLED;
+}
+
+static int set_enable(const char *arg, const struct kernel_param *kp)
+{
+	int ret;
+	bool old_val = *((bool *) kp->arg);
+	bool new_val;
+
+	if (!arg)
+		arg = "1";
+	ret = strtobool(arg, &new_val);
+	if (ret)
+		return ret;
+
+	if (!old_val && new_val) {
+		if (start_monitoring()) {
+			pr_err("L2PM counters already in use.\n");
+			return ret;
+		} else {
+			pr_info("Enabling CPU BW monitoring\n");
+		}
+	} else if (old_val && !new_val) {
+		pr_info("Disabling CPU BW monitoring\n");
+		stop_monitoring();
+	}
+
+	*(bool *) kp->arg = new_val;
+	return 0;
+}
+
+static struct kernel_param_ops enable_ops = {
+	.set = set_enable,
+	.get = param_get_bool,
+};
+
+static int cpubw_krait_init(void)
+{
+	bw_sample_wq = alloc_workqueue("cpubw-krait", WQ_HIGHPRI, 0);
+	if (!bw_sample_wq)
+		return -ENOMEM;
+
+	bus_client = msm_bus_scale_register_client(&bw_data);
+	if (!bus_client) {
+		pr_err("Unable to register bus client\n");
+		destroy_workqueue(bw_sample_wq);
+		return -ENODEV;
+	}
+
+	return 0;
+}
+late_initcall(cpubw_krait_init);
+
+MODULE_DESCRIPTION("CPU DDR bandwidth voting driver for Krait CPUs");
+MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/mach-msm/msm_bus/msm_bus_bimc.c b/arch/arm/mach-msm/msm_bus/msm_bus_bimc.c
index 8b64653..c745f92 100644
--- a/arch/arm/mach-msm/msm_bus/msm_bus_bimc.c
+++ b/arch/arm/mach-msm/msm_bus/msm_bus_bimc.c
@@ -1578,15 +1578,16 @@
 
 static void set_qos_bw_regs(void __iomem *baddr, uint32_t mas_index,
 	int32_t th, int32_t tm, int32_t tl, uint32_t gp,
-	uint32_t gc, bool bke_en)
+	uint32_t gc)
 {
 	int32_t reg_val, val;
+	int32_t bke_reg_val;
 	int16_t val2;
 
 	/* Disable BKE before writing to registers as per spec */
-	reg_val = readl_relaxed(M_BKE_EN_ADDR(baddr, mas_index)) &
+	bke_reg_val = readl_relaxed(M_BKE_EN_ADDR(baddr, mas_index)) &
 		M_BKE_EN_RMSK;
-	writel_relaxed((reg_val & ~(M_BKE_EN_EN_BMSK)),
+	writel_relaxed((bke_reg_val & ~(M_BKE_EN_EN_BMSK)),
 		M_BKE_EN_ADDR(baddr, mas_index));
 
 	/* Write values of registers calculated */
@@ -1624,8 +1625,7 @@
 	/* Set BKE enable to the value it was */
 	reg_val = readl_relaxed(M_BKE_EN_ADDR(baddr, mas_index)) &
 		M_BKE_EN_RMSK;
-	val =  bke_en << M_BKE_EN_EN_SHFT;
-	writel_relaxed(((reg_val & ~(M_BKE_EN_EN_BMSK)) | (val &
+	writel_relaxed(((reg_val & ~(M_BKE_EN_EN_BMSK)) | (bke_reg_val &
 		M_BKE_EN_EN_BMSK)), M_BKE_EN_ADDR(baddr, mas_index));
 	/* Ensure that all bandwidth register writes have completed
 	 * before returning
@@ -1651,7 +1651,7 @@
 	/* Only calculate if there's a requested bandwidth and window */
 	if (qbw->bw && qbw->ws) {
 		int64_t th, tm, tl;
-		uint32_t gp, gc, data_width;
+		uint32_t gp, gc;
 		int64_t gp_nominal, gp_required, gp_calc, data, temp;
 		int64_t win = qbw->ws * binfo->qos_freq;
 		temp = win;
@@ -1666,16 +1666,7 @@
 		 * Calculate max window size, defined by bw request.
 		 * Units: (KHz, MB/s)
 		 */
-		data_width = (readl_relaxed(M_CONFIG_INFO_2_ADDR(
-			binfo->base, mas_index)) &
-			M_CONFIG_INFO_2_M_DATA_WIDTH_BMSK) >>
-			M_CONFIG_INFO_2_M_DATA_WIDTH_SHFT;
-
-		/* If unspecified, use data-width 8 by default */
-		if (!data_width)
-			data_width = 8;
-
-		gp_calc = MAX_GC * data_width * binfo->qos_freq * 1000;
+		gp_calc = MAX_GC * binfo->qos_freq * 1000;
 		gp_required = gp_calc;
 		bimc_div(&gp_required, qbw->bw);
 
@@ -1684,7 +1675,7 @@
 
 		/* Calculate bandwith in grants and ceil. */
 		temp = qbw->bw * gp;
-		data = data_width * binfo->qos_freq * 1000;
+		data = binfo->qos_freq * 1000;
 		bimc_div(&temp, data);
 		gc = min_t(int64_t, MAX_GC, temp);
 
@@ -1704,12 +1695,10 @@
 			mas_index, th, tm);
 		MSM_BUS_DBG("BIMC: tl: %llu gp:%u gc: %u bke_en: %u\n",
 			tl, gp, gc, bke_en);
-		set_qos_bw_regs(binfo->base, mas_index, th, tm, tl, gp,
-			gc, bke_en);
+		set_qos_bw_regs(binfo->base, mas_index, th, tm, tl, gp, gc);
 	} else
 		/* Clear bandwidth registers */
-		set_qos_bw_regs(binfo->base, mas_index, 0, 0, 0, 0, 0,
-			bke_en);
+		set_qos_bw_regs(binfo->base, mas_index, 0, 0, 0, 0, 0);
 }
 
 static int msm_bus_bimc_allocate_commit_data(struct msm_bus_fabric_registration
@@ -1816,16 +1805,27 @@
 	kfree(cd);
 }
 
-static void bke_switch(void __iomem *baddr, uint32_t mas_index, bool req)
+static void bke_switch(
+	void __iomem *baddr, uint32_t mas_index, bool req, int mode)
 {
 	uint32_t reg_val, val;
 
 	val = req << M_BKE_EN_EN_SHFT;
 	reg_val = readl_relaxed(M_BKE_EN_ADDR(baddr, mas_index)) &
 		M_BKE_EN_RMSK;
+	if (val == reg_val)
+		return;
+
+	if (!req && mode == BIMC_QOS_MODE_FIXED)
+		set_qos_mode(baddr, mas_index, 1, 1, 1);
+
 	writel_relaxed(((reg_val & ~(M_BKE_EN_EN_BMSK)) | (val &
 		M_BKE_EN_EN_BMSK)), M_BKE_EN_ADDR(baddr, mas_index));
+	/* Make sure BKE on/off goes through before changing priorities */
 	wmb();
+
+	if (req)
+		set_qos_mode(baddr, mas_index, 0, 0, 0);
 }
 
 static void msm_bus_bimc_config_master(
@@ -1854,13 +1854,13 @@
 	case BIMC_QOS_MODE_FIXED:
 		for (i = 0; i < ports; i++)
 			bke_switch(binfo->base, info->node_info->qport[i],
-				BKE_OFF);
+				BKE_OFF, mode);
 		break;
 	case BIMC_QOS_MODE_REGULATOR:
 	case BIMC_QOS_MODE_LIMITER:
 		for (i = 0; i < ports; i++)
 			bke_switch(binfo->base, info->node_info->qport[i],
-				BKE_ON);
+				BKE_ON, mode);
 		break;
 	default:
 		break;
@@ -1969,8 +1969,8 @@
 static void bimc_set_static_qos_bw(struct msm_bus_bimc_info *binfo,
 	int mport, struct msm_bus_bimc_qos_bw *qbw)
 {
-	int32_t bw_MBps, thh = 0, thm, thl, gc;
-	int16_t gp;
+	int32_t bw_mbps, thh = 0, thm, thl, gc;
+	int32_t gp;
 	u64 temp;
 
 	if (binfo->qos_freq == 0) {
@@ -1986,17 +1986,17 @@
 	/* Convert bandwidth to MBPS */
 	temp = qbw->bw;
 	bimc_div(&temp, 1000000);
-	bw_MBps = temp;
+	bw_mbps = temp;
 
 	/* Grant period in clock cycles
 	 * Grant period from bandwidth structure
-	 * is in micro seconds, QoS freq is in KHz.
+	 * is in nano seconds, QoS freq is in KHz.
 	 * Divide by 1000 to get clock cycles */
-	gp = (binfo->qos_freq * qbw->gp) / 1000;
+	gp = (binfo->qos_freq * qbw->gp) / (1000 * NSEC_PER_USEC);
 
 	/* Grant count = BW in MBps * Grant period
 	 * in micro seconds */
-	gc = bw_MBps * qbw->gp;
+	gc = bw_mbps * (qbw->gp / NSEC_PER_USEC);
 
 	/* Medium threshold = -((Medium Threshold percentage *
 	 * Grant count) / 100) */
@@ -2007,8 +2007,10 @@
 	thl = -gc;
 	qbw->thl = thl;
 
-	set_qos_bw_regs(binfo->base, mport, thh, thm, thl, gp,
-		gc, 1);
+	MSM_BUS_DBG("%s: BKE parameters: gp %d, gc %d, thm %d thl %d thh %d",
+			__func__, gp, gc, thm, thl, thh);
+
+	set_qos_bw_regs(binfo->base, mport, thh, thm, thl, gp, gc);
 }
 
 static void bimc_init_mas_reg(struct msm_bus_bimc_info *binfo,
diff --git a/drivers/cpufreq/cpufreq_interactive.c b/drivers/cpufreq/cpufreq_interactive.c
index 7d1952c..eb9cd2e 100644
--- a/drivers/cpufreq/cpufreq_interactive.c
+++ b/drivers/cpufreq/cpufreq_interactive.c
@@ -29,6 +29,7 @@
 #include <linux/workqueue.h>
 #include <linux/kthread.h>
 #include <linux/slab.h>
+#include <linux/kernel_stat.h>
 #include <asm/cputime.h>
 
 #define CREATE_TRACE_POINTS
@@ -93,7 +94,11 @@
  * timer interval.
  */
 #define DEFAULT_ABOVE_HISPEED_DELAY DEFAULT_TIMER_RATE
-static unsigned long above_hispeed_delay_val = DEFAULT_ABOVE_HISPEED_DELAY;
+static unsigned int default_above_hispeed_delay[] = {
+	DEFAULT_ABOVE_HISPEED_DELAY };
+static spinlock_t above_hispeed_delay_lock;
+static unsigned int *above_hispeed_delay = default_above_hispeed_delay;
+static int nabove_hispeed_delay = ARRAY_SIZE(default_above_hispeed_delay);
 
 /* Non-zero means indefinite speed boost active */
 static int boost_val;
@@ -109,6 +114,8 @@
 #define DEFAULT_TIMER_SLACK (4 * DEFAULT_TIMER_RATE)
 static int timer_slack_val = DEFAULT_TIMER_SLACK;
 
+static bool io_is_busy;
+
 static int cpufreq_governor_interactive(struct cpufreq_policy *policy,
 		unsigned int event);
 
@@ -122,27 +129,108 @@
 	.owner = THIS_MODULE,
 };
 
+static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu,
+						  cputime64_t *wall)
+{
+	u64 idle_time;
+	u64 cur_wall_time;
+	u64 busy_time;
+
+	cur_wall_time = jiffies64_to_cputime64(get_jiffies_64());
+
+	busy_time  = kcpustat_cpu(cpu).cpustat[CPUTIME_USER];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SYSTEM];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_IRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_SOFTIRQ];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_STEAL];
+	busy_time += kcpustat_cpu(cpu).cpustat[CPUTIME_NICE];
+
+	idle_time = cur_wall_time - busy_time;
+	if (wall)
+		*wall = jiffies_to_usecs(cur_wall_time);
+
+	return jiffies_to_usecs(idle_time);
+}
+
+static inline cputime64_t get_cpu_idle_time(unsigned int cpu,
+					    cputime64_t *wall)
+{
+	u64 idle_time = get_cpu_idle_time_us(cpu, wall);
+
+	if (idle_time == -1ULL)
+		idle_time = get_cpu_idle_time_jiffy(cpu, wall);
+	else if (!io_is_busy)
+		idle_time += get_cpu_iowait_time_us(cpu, wall);
+
+	return idle_time;
+}
+
 static void cpufreq_interactive_timer_resched(
 	struct cpufreq_interactive_cpuinfo *pcpu)
 {
-	unsigned long expires = jiffies + usecs_to_jiffies(timer_rate);
+	unsigned long expires;
 	unsigned long flags;
 
+	spin_lock_irqsave(&pcpu->load_lock, flags);
+	pcpu->time_in_idle =
+		get_cpu_idle_time(smp_processor_id(),
+				     &pcpu->time_in_idle_timestamp);
+	pcpu->cputime_speedadj = 0;
+	pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp;
+	expires = jiffies + usecs_to_jiffies(timer_rate);
 	mod_timer_pinned(&pcpu->cpu_timer, expires);
+
 	if (timer_slack_val >= 0 && pcpu->target_freq > pcpu->policy->min) {
 		expires += usecs_to_jiffies(timer_slack_val);
 		mod_timer_pinned(&pcpu->cpu_slack_timer, expires);
 	}
 
+	spin_unlock_irqrestore(&pcpu->load_lock, flags);
+}
+
+/* The caller shall take enable_sem write semaphore to avoid any timer race.
+ * The cpu_timer and cpu_slack_timer must be deactivated when calling this
+ * function.
+ */
+static void cpufreq_interactive_timer_start(int cpu)
+{
+	struct cpufreq_interactive_cpuinfo *pcpu = &per_cpu(cpuinfo, cpu);
+	unsigned long expires = jiffies + usecs_to_jiffies(timer_rate);
+	unsigned long flags;
+
+	pcpu->cpu_timer.expires = expires;
+	add_timer_on(&pcpu->cpu_timer, cpu);
+	if (timer_slack_val >= 0 && pcpu->target_freq > pcpu->policy->min) {
+		expires += usecs_to_jiffies(timer_slack_val);
+		pcpu->cpu_slack_timer.expires = expires;
+		add_timer_on(&pcpu->cpu_slack_timer, cpu);
+	}
+
 	spin_lock_irqsave(&pcpu->load_lock, flags);
 	pcpu->time_in_idle =
-		get_cpu_idle_time_us(smp_processor_id(),
-				     &pcpu->time_in_idle_timestamp);
+		get_cpu_idle_time(cpu, &pcpu->time_in_idle_timestamp);
 	pcpu->cputime_speedadj = 0;
 	pcpu->cputime_speedadj_timestamp = pcpu->time_in_idle_timestamp;
 	spin_unlock_irqrestore(&pcpu->load_lock, flags);
 }
 
+static unsigned int freq_to_above_hispeed_delay(unsigned int freq)
+{
+	int i;
+	unsigned int ret;
+	unsigned long flags;
+
+	spin_lock_irqsave(&above_hispeed_delay_lock, flags);
+
+	for (i = 0; i < nabove_hispeed_delay - 1 &&
+			freq >= above_hispeed_delay[i+1]; i += 2)
+		;
+
+	ret = above_hispeed_delay[i];
+	spin_unlock_irqrestore(&above_hispeed_delay_lock, flags);
+	return ret;
+}
+
 static unsigned int freq_to_targetload(unsigned int freq)
 {
 	int i;
@@ -185,9 +273,10 @@
 		 * than or equal to the target load.
 		 */
 
-		cpufreq_frequency_table_target(
-			pcpu->policy, pcpu->freq_table, loadadjfreq / tl,
-			CPUFREQ_RELATION_L, &index);
+		if (cpufreq_frequency_table_target(
+			    pcpu->policy, pcpu->freq_table, loadadjfreq / tl,
+			    CPUFREQ_RELATION_L, &index))
+			break;
 		freq = pcpu->freq_table[index].frequency;
 
 		if (freq > prevfreq) {
@@ -199,10 +288,11 @@
 				 * Find the highest frequency that is less
 				 * than freqmax.
 				 */
-				cpufreq_frequency_table_target(
-					pcpu->policy, pcpu->freq_table,
-					freqmax - 1, CPUFREQ_RELATION_H,
-					&index);
+				if (cpufreq_frequency_table_target(
+					    pcpu->policy, pcpu->freq_table,
+					    freqmax - 1, CPUFREQ_RELATION_H,
+					    &index))
+					break;
 				freq = pcpu->freq_table[index].frequency;
 
 				if (freq == freqmin) {
@@ -225,10 +315,11 @@
 				 * Find the lowest frequency that is higher
 				 * than freqmin.
 				 */
-				cpufreq_frequency_table_target(
-					pcpu->policy, pcpu->freq_table,
-					freqmin + 1, CPUFREQ_RELATION_L,
-					&index);
+				if (cpufreq_frequency_table_target(
+					    pcpu->policy, pcpu->freq_table,
+					    freqmin + 1, CPUFREQ_RELATION_L,
+					    &index))
+					break;
 				freq = pcpu->freq_table[index].frequency;
 
 				/*
@@ -256,10 +347,15 @@
 	unsigned int delta_time;
 	u64 active_time;
 
-	now_idle = get_cpu_idle_time_us(cpu, &now);
+	now_idle = get_cpu_idle_time(cpu, &now);
 	delta_idle = (unsigned int)(now_idle - pcpu->time_in_idle);
 	delta_time = (unsigned int)(now - pcpu->time_in_idle_timestamp);
-	active_time = delta_time - delta_idle;
+
+	if (delta_time <= delta_idle)
+		active_time = 0;
+	else
+		active_time = delta_time - delta_idle;
+
 	pcpu->cputime_speedadj += active_time * pcpu->policy->cur;
 
 	pcpu->time_in_idle = now_idle;
@@ -315,7 +411,8 @@
 
 	if (pcpu->target_freq >= hispeed_freq &&
 	    new_freq > pcpu->target_freq &&
-	    now - pcpu->hispeed_validate_time < above_hispeed_delay_val) {
+	    now - pcpu->hispeed_validate_time <
+	    freq_to_above_hispeed_delay(pcpu->target_freq)) {
 		trace_cpufreq_interactive_notyet(
 			data, cpu_load, pcpu->target_freq,
 			pcpu->policy->cur, new_freq);
@@ -326,11 +423,8 @@
 
 	if (cpufreq_frequency_table_target(pcpu->policy, pcpu->freq_table,
 					   new_freq, CPUFREQ_RELATION_L,
-					   &index)) {
-		pr_warn_once("timer %d: cpufreq_frequency_table_target error\n",
-			     (int) data);
+					   &index))
 		goto rearm;
-	}
 
 	new_freq = pcpu->freq_table[index].frequency;
 
@@ -565,9 +659,19 @@
 		for_each_cpu(cpu, pcpu->policy->cpus) {
 			struct cpufreq_interactive_cpuinfo *pjcpu =
 				&per_cpu(cpuinfo, cpu);
+			if (cpu != freq->cpu) {
+				if (!down_read_trylock(&pjcpu->enable_sem))
+					continue;
+				if (!pjcpu->governor_enabled) {
+					up_read(&pjcpu->enable_sem);
+					continue;
+				}
+			}
 			spin_lock_irqsave(&pjcpu->load_lock, flags);
 			update_load(cpu);
 			spin_unlock_irqrestore(&pjcpu->load_lock, flags);
+			if (cpu != freq->cpu)
+				up_read(&pjcpu->enable_sem);
 		}
 
 		up_read(&pcpu->enable_sem);
@@ -579,6 +683,51 @@
 	.notifier_call = cpufreq_interactive_notifier,
 };
 
+static unsigned int *get_tokenized_data(const char *buf, int *num_tokens)
+{
+	const char *cp;
+	int i;
+	int ntokens = 1;
+	unsigned int *tokenized_data;
+	int err = -EINVAL;
+
+	cp = buf;
+	while ((cp = strpbrk(cp + 1, " :")))
+		ntokens++;
+
+	if (!(ntokens & 0x1))
+		goto err;
+
+	tokenized_data = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL);
+	if (!tokenized_data) {
+		err = -ENOMEM;
+		goto err;
+	}
+
+	cp = buf;
+	i = 0;
+	while (i < ntokens) {
+		if (sscanf(cp, "%u", &tokenized_data[i++]) != 1)
+			goto err_kfree;
+
+		cp = strpbrk(cp, " :");
+		if (!cp)
+			break;
+		cp++;
+	}
+
+	if (i != ntokens)
+		goto err_kfree;
+
+	*num_tokens = ntokens;
+	return tokenized_data;
+
+err_kfree:
+	kfree(tokenized_data);
+err:
+	return ERR_PTR(err);
+}
+
 static ssize_t show_target_loads(
 	struct kobject *kobj, struct attribute *attr, char *buf)
 {
@@ -592,7 +741,7 @@
 		ret += sprintf(buf + ret, "%u%s", target_loads[i],
 			       i & 0x1 ? ":" : " ");
 
-	ret += sprintf(buf + ret, "\n");
+	ret += sprintf(buf + --ret, "\n");
 	spin_unlock_irqrestore(&target_loads_lock, flags);
 	return ret;
 }
@@ -601,40 +750,13 @@
 	struct kobject *kobj, struct attribute *attr, const char *buf,
 	size_t count)
 {
-	int ret;
-	const char *cp;
+	int ntokens;
 	unsigned int *new_target_loads = NULL;
-	int ntokens = 1;
-	int i;
 	unsigned long flags;
 
-	cp = buf;
-	while ((cp = strpbrk(cp + 1, " :")))
-		ntokens++;
-
-	if (!(ntokens & 0x1))
-		goto err_inval;
-
-	new_target_loads = kmalloc(ntokens * sizeof(unsigned int), GFP_KERNEL);
-	if (!new_target_loads) {
-		ret = -ENOMEM;
-		goto err;
-	}
-
-	cp = buf;
-	i = 0;
-	while (i < ntokens) {
-		if (sscanf(cp, "%u", &new_target_loads[i++]) != 1)
-			goto err_inval;
-
-		cp = strpbrk(cp, " :");
-		if (!cp)
-			break;
-		cp++;
-	}
-
-	if (i != ntokens)
-		goto err_inval;
+	new_target_loads = get_tokenized_data(buf, &ntokens);
+	if (IS_ERR(new_target_loads))
+		return PTR_RET(new_target_loads);
 
 	spin_lock_irqsave(&target_loads_lock, flags);
 	if (target_loads != default_target_loads)
@@ -643,18 +765,56 @@
 	ntarget_loads = ntokens;
 	spin_unlock_irqrestore(&target_loads_lock, flags);
 	return count;
-
-err_inval:
-	ret = -EINVAL;
-err:
-	kfree(new_target_loads);
-	return ret;
 }
 
 static struct global_attr target_loads_attr =
 	__ATTR(target_loads, S_IRUGO | S_IWUSR,
 		show_target_loads, store_target_loads);
 
+static ssize_t show_above_hispeed_delay(
+	struct kobject *kobj, struct attribute *attr, char *buf)
+{
+	int i;
+	ssize_t ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&above_hispeed_delay_lock, flags);
+
+	for (i = 0; i < nabove_hispeed_delay; i++)
+		ret += sprintf(buf + ret, "%u%s", above_hispeed_delay[i],
+			       i & 0x1 ? ":" : " ");
+
+	ret += sprintf(buf + --ret, "\n");
+	spin_unlock_irqrestore(&above_hispeed_delay_lock, flags);
+	return ret;
+}
+
+static ssize_t store_above_hispeed_delay(
+	struct kobject *kobj, struct attribute *attr, const char *buf,
+	size_t count)
+{
+	int ntokens;
+	unsigned int *new_above_hispeed_delay = NULL;
+	unsigned long flags;
+
+	new_above_hispeed_delay = get_tokenized_data(buf, &ntokens);
+	if (IS_ERR(new_above_hispeed_delay))
+		return PTR_RET(new_above_hispeed_delay);
+
+	spin_lock_irqsave(&above_hispeed_delay_lock, flags);
+	if (above_hispeed_delay != default_above_hispeed_delay)
+		kfree(above_hispeed_delay);
+	above_hispeed_delay = new_above_hispeed_delay;
+	nabove_hispeed_delay = ntokens;
+	spin_unlock_irqrestore(&above_hispeed_delay_lock, flags);
+	return count;
+
+}
+
+static struct global_attr above_hispeed_delay_attr =
+	__ATTR(above_hispeed_delay, S_IRUGO | S_IWUSR,
+		show_above_hispeed_delay, store_above_hispeed_delay);
+
 static ssize_t show_hispeed_freq(struct kobject *kobj,
 				 struct attribute *attr, char *buf)
 {
@@ -723,28 +883,6 @@
 static struct global_attr min_sample_time_attr = __ATTR(min_sample_time, 0644,
 		show_min_sample_time, store_min_sample_time);
 
-static ssize_t show_above_hispeed_delay(struct kobject *kobj,
-					struct attribute *attr, char *buf)
-{
-	return sprintf(buf, "%lu\n", above_hispeed_delay_val);
-}
-
-static ssize_t store_above_hispeed_delay(struct kobject *kobj,
-					 struct attribute *attr,
-					 const char *buf, size_t count)
-{
-	int ret;
-	unsigned long val;
-
-	ret = strict_strtoul(buf, 0, &val);
-	if (ret < 0)
-		return ret;
-	above_hispeed_delay_val = val;
-	return count;
-}
-
-define_one_global_rw(above_hispeed_delay);
-
 static ssize_t show_timer_rate(struct kobject *kobj,
 			struct attribute *attr, char *buf)
 {
@@ -862,17 +1000,40 @@
 
 define_one_global_rw(boostpulse_duration);
 
+static ssize_t show_io_is_busy(struct kobject *kobj,
+			struct attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", io_is_busy);
+}
+
+static ssize_t store_io_is_busy(struct kobject *kobj,
+			struct attribute *attr, const char *buf, size_t count)
+{
+	int ret;
+	unsigned long val;
+
+	ret = kstrtoul(buf, 0, &val);
+	if (ret < 0)
+		return ret;
+	io_is_busy = val;
+	return count;
+}
+
+static struct global_attr io_is_busy_attr = __ATTR(io_is_busy, 0644,
+		show_io_is_busy, store_io_is_busy);
+
 static struct attribute *interactive_attributes[] = {
 	&target_loads_attr.attr,
+	&above_hispeed_delay_attr.attr,
 	&hispeed_freq_attr.attr,
 	&go_hispeed_load_attr.attr,
-	&above_hispeed_delay.attr,
 	&min_sample_time_attr.attr,
 	&timer_rate_attr.attr,
 	&timer_slack.attr,
 	&boost.attr,
 	&boostpulse.attr,
 	&boostpulse_duration.attr,
+	&io_is_busy_attr.attr,
 	NULL,
 };
 
@@ -922,8 +1083,6 @@
 			hispeed_freq = policy->max;
 
 		for_each_cpu(j, policy->cpus) {
-			unsigned long expires;
-
 			pcpu = &per_cpu(cpuinfo, j);
 			pcpu->policy = policy;
 			pcpu->target_freq = policy->cur;
@@ -934,14 +1093,7 @@
 			pcpu->hispeed_validate_time =
 				pcpu->floor_validate_time;
 			down_write(&pcpu->enable_sem);
-			expires = jiffies + usecs_to_jiffies(timer_rate);
-			pcpu->cpu_timer.expires = expires;
-			add_timer_on(&pcpu->cpu_timer, j);
-			if (timer_slack_val >= 0) {
-				expires += usecs_to_jiffies(timer_slack_val);
-				pcpu->cpu_slack_timer.expires = expires;
-				add_timer_on(&pcpu->cpu_slack_timer, j);
-			}
+			cpufreq_interactive_timer_start(j);
 			pcpu->governor_enabled = 1;
 			up_write(&pcpu->enable_sem);
 		}
@@ -1000,6 +1152,33 @@
 		else if (policy->min > policy->cur)
 			__cpufreq_driver_target(policy,
 					policy->min, CPUFREQ_RELATION_L);
+		for_each_cpu(j, policy->cpus) {
+			pcpu = &per_cpu(cpuinfo, j);
+
+			/* hold write semaphore to avoid race */
+			down_write(&pcpu->enable_sem);
+			if (pcpu->governor_enabled == 0) {
+				up_write(&pcpu->enable_sem);
+				continue;
+			}
+
+			/* update target_freq firstly */
+			if (policy->max < pcpu->target_freq)
+				pcpu->target_freq = policy->max;
+			else if (policy->min > pcpu->target_freq)
+				pcpu->target_freq = policy->min;
+
+			/* Reschedule timer.
+			 * Delete the timers, else the timer callback may
+			 * return without re-arm the timer when failed
+			 * acquire the semaphore. This race may cause timer
+			 * stopped unexpectedly.
+			 */
+			del_timer_sync(&pcpu->cpu_timer);
+			del_timer_sync(&pcpu->cpu_slack_timer);
+			cpufreq_interactive_timer_start(j);
+			up_write(&pcpu->enable_sem);
+		}
 		break;
 	}
 	return 0;
@@ -1029,6 +1208,7 @@
 
 	spin_lock_init(&target_loads_lock);
 	spin_lock_init(&speedchange_cpumask_lock);
+	spin_lock_init(&above_hispeed_delay_lock);
 	mutex_init(&gov_lock);
 	speedchange_task =
 		kthread_create(cpufreq_interactive_speedchange_task, NULL,
diff --git a/drivers/gpu/ion/Makefile b/drivers/gpu/ion/Makefile
index 0e460c8..108abe6 100644
--- a/drivers/gpu/ion/Makefile
+++ b/drivers/gpu/ion/Makefile
@@ -2,4 +2,4 @@
 			ion_carveout_heap.o ion_chunk_heap.o
 obj-$(CONFIG_CMA) += ion_cma_heap.o ion_cma_secure_heap.o
 obj-$(CONFIG_ION_TEGRA) += tegra/
-obj-$(CONFIG_ION_MSM) += ion_iommu_heap.o ion_cp_heap.o ion_removed_heap.o msm/
+obj-$(CONFIG_ION_MSM) += ion_cp_heap.o ion_removed_heap.o msm/
diff --git a/drivers/gpu/ion/ion.c b/drivers/gpu/ion/ion.c
index 6777dae..c791c49 100644
--- a/drivers/gpu/ion/ion.c
+++ b/drivers/gpu/ion/ion.c
@@ -24,14 +24,13 @@
 #include <linux/ion.h>
 #include <linux/kthread.h>
 #include <linux/list.h>
+#include <linux/list_sort.h>
 #include <linux/memblock.h>
 #include <linux/miscdevice.h>
 #include <linux/export.h>
 #include <linux/mm.h>
 #include <linux/mm_types.h>
 #include <linux/rbtree.h>
-#include <linux/rtmutex.h>
-#include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
@@ -62,6 +61,8 @@
 			      unsigned long arg);
 	struct rb_root clients;
 	struct dentry *debug_root;
+	struct dentry *heaps_debug_root;
+	struct dentry *clients_debug_root;
 };
 
 /**
@@ -147,7 +148,6 @@
 
 static int ion_buffer_alloc_dirty(struct ion_buffer *buffer);
 
-static bool ion_heap_drain_freelist(struct ion_heap *heap);
 /* this function should only be called while dev->lock is held */
 static struct ion_buffer *ion_buffer_create(struct ion_heap *heap,
 				     struct ion_device *dev,
@@ -174,7 +174,7 @@
 		if (!(heap->flags & ION_HEAP_FLAG_DEFER_FREE))
 			goto err2;
 
-		ion_heap_drain_freelist(heap);
+		ion_heap_freelist_drain(heap, 0);
 		ret = heap->ops->allocate(heap, buffer, len, align,
 					  flags);
 		if (ret)
@@ -242,7 +242,7 @@
 		buffer->heap->ops->unsecure_buffer(buffer, 1);
 }
 
-static void _ion_buffer_destroy(struct ion_buffer *buffer)
+void ion_buffer_destroy(struct ion_buffer *buffer)
 {
 	if (WARN_ON(buffer->kmap_cnt > 0))
 		buffer->heap->ops->unmap_kernel(buffer->heap, buffer);
@@ -255,7 +255,7 @@
 	kfree(buffer);
 }
 
-static void ion_buffer_destroy(struct kref *kref)
+static void _ion_buffer_destroy(struct kref *kref)
 {
 	struct ion_buffer *buffer = container_of(kref, struct ion_buffer, ref);
 	struct ion_heap *heap = buffer->heap;
@@ -265,14 +265,10 @@
 	rb_erase(&buffer->node, &dev->buffers);
 	mutex_unlock(&dev->buffer_lock);
 
-	if (heap->flags & ION_HEAP_FLAG_DEFER_FREE) {
-		rt_mutex_lock(&heap->lock);
-		list_add(&buffer->list, &heap->free_list);
-		rt_mutex_unlock(&heap->lock);
-		wake_up(&heap->waitqueue);
-		return;
-	}
-	_ion_buffer_destroy(buffer);
+	if (heap->flags & ION_HEAP_FLAG_DEFER_FREE)
+		ion_heap_freelist_add(heap, buffer);
+	else
+		ion_buffer_destroy(buffer);
 }
 
 static void ion_buffer_get(struct ion_buffer *buffer)
@@ -282,7 +278,7 @@
 
 static int ion_buffer_put(struct ion_buffer *buffer)
 {
-	return kref_put(&buffer->ref, ion_buffer_destroy);
+	return kref_put(&buffer->ref, _ion_buffer_destroy);
 }
 
 static void ion_buffer_add_to_handle(struct ion_buffer *buffer)
@@ -708,6 +704,35 @@
 	.release = single_release,
 };
 
+static bool startswith(const char *string, const char *prefix)
+{
+	size_t l1 = strlen(string);
+	size_t l2 = strlen(prefix);
+	return strncmp(string, prefix, min(l1, l2)) == 0;
+}
+
+static int ion_get_client_serial(const struct rb_root *root,
+					const unsigned char *name)
+{
+	int serial = -1;
+	struct rb_node *node;
+	for (node = rb_first(root); node; node = rb_next(node)) {
+		int n;
+		char *serial_string;
+		struct ion_client *client = rb_entry(node, struct ion_client,
+						node);
+		if (!startswith(client->name, name))
+			continue;
+		serial_string = strrchr(client->name, '-');
+		if (!serial_string)
+			continue;
+		serial_string++;
+		sscanf(serial_string, "%d", &n);
+		serial = max(serial, n);
+	}
+	return serial + 1;
+}
+
 struct ion_client *ion_client_create(struct ion_device *dev,
 				     const char *name)
 {
@@ -717,13 +742,16 @@
 	struct rb_node *parent = NULL;
 	struct ion_client *entry;
 	pid_t pid;
-	unsigned int name_len;
+	int name_len;
+	int client_serial;
 
 	if (!name) {
 		pr_err("%s: Name cannot be null\n", __func__);
 		return ERR_PTR(-EINVAL);
 	}
 	name_len = strnlen(name, 64);
+	/* add some space to accommodate the serial number suffix */
+	name_len = min(64, name_len + 11);
 
 	get_task_struct(current->group_leader);
 	task_lock(current->group_leader);
@@ -754,14 +782,14 @@
 		put_task_struct(current->group_leader);
 		kfree(client);
 		return ERR_PTR(-ENOMEM);
-	} else {
-		strlcpy(client->name, name, name_len+1);
 	}
 
 	client->task = task;
 	client->pid = pid;
 
 	down_write(&dev->lock);
+	client_serial = ion_get_client_serial(&dev->clients, name);
+	snprintf(client->name, name_len, "%s-%d", name, client_serial);
 	p = &dev->clients.rb_node;
 	while (*p) {
 		parent = *p;
@@ -776,9 +804,16 @@
 	rb_insert_color(&client->node, &dev->clients);
 
 
-	client->debug_root = debugfs_create_file(name, 0664,
-						 dev->debug_root, client,
-						 &debug_client_fops);
+	client->debug_root = debugfs_create_file(client->name, 0664,
+						dev->clients_debug_root,
+						client, &debug_client_fops);
+	if (!client->debug_root) {
+		char buf[256], *path;
+		path = dentry_path(dev->clients_debug_root, buf, 256);
+		pr_err("Failed to created client debugfs at %s/%s\n",
+			path, client->name);
+	}
+
 	up_write(&dev->lock);
 
 	return client;
@@ -1397,134 +1432,89 @@
 }
 
 /**
- * Searches through a clients handles to find if the buffer is owned
- * by this client. Used for debug output.
- * @param client pointer to candidate owner of buffer
- * @param buf pointer to buffer that we are trying to find the owner of
- * @return 1 if found, 0 otherwise
- */
-static int ion_debug_find_buffer_owner(const struct ion_client *client,
-				       const struct ion_buffer *buf)
-{
-	struct rb_node *n;
-
-	for (n = rb_first(&client->handles); n; n = rb_next(n)) {
-		const struct ion_handle *handle = rb_entry(n,
-						     const struct ion_handle,
-						     node);
-		if (handle->buffer == buf)
-			return 1;
-	}
-	return 0;
-}
-
-/**
- * Adds mem_map_data pointer to the tree of mem_map
- * Used for debug output.
- * @param mem_map The mem_map tree
- * @param data The new data to add to the tree
- */
-static void ion_debug_mem_map_add(struct rb_root *mem_map,
-				  struct mem_map_data *data)
-{
-	struct rb_node **p = &mem_map->rb_node;
-	struct rb_node *parent = NULL;
-	struct mem_map_data *entry;
-
-	while (*p) {
-		parent = *p;
-		entry = rb_entry(parent, struct mem_map_data, node);
-
-		if (data->addr < entry->addr) {
-			p = &(*p)->rb_left;
-		} else if (data->addr > entry->addr) {
-			p = &(*p)->rb_right;
-		} else {
-			pr_err("%s: mem_map_data already found.", __func__);
-			BUG();
-		}
-	}
-	rb_link_node(&data->node, parent, p);
-	rb_insert_color(&data->node, mem_map);
-}
-
-/**
- * Search for an owner of a buffer by iterating over all ION clients.
- * @param dev ion device containing pointers to all the clients.
- * @param buffer pointer to buffer we are trying to find the owner of.
- * @return name of owner.
- */
-const char *ion_debug_locate_owner(const struct ion_device *dev,
-					 const struct ion_buffer *buffer)
-{
-	struct rb_node *j;
-	const char *client_name = NULL;
-
-	for (j = rb_first(&dev->clients); j && !client_name;
-			  j = rb_next(j)) {
-		struct ion_client *client = rb_entry(j, struct ion_client,
-						     node);
-		if (ion_debug_find_buffer_owner(client, buffer))
-			client_name = client->name;
-	}
-	return client_name;
-}
-
-/**
  * Create a mem_map of the heap.
  * @param s seq_file to log error message to.
  * @param heap The heap to create mem_map for.
  * @param mem_map The mem map to be created.
  */
 void ion_debug_mem_map_create(struct seq_file *s, struct ion_heap *heap,
-			      struct rb_root *mem_map)
+			      struct list_head *mem_map)
 {
 	struct ion_device *dev = heap->dev;
-	struct rb_node *n;
+	struct rb_node *cnode;
 	size_t size;
+	struct ion_client *client;
 
 	if (!heap->ops->phys)
 		return;
 
-	for (n = rb_first(&dev->buffers); n; n = rb_next(n)) {
-		struct ion_buffer *buffer =
-				rb_entry(n, struct ion_buffer, node);
-		if (buffer->heap->id == heap->id) {
-			struct mem_map_data *data =
-					kzalloc(sizeof(*data), GFP_KERNEL);
-			if (!data) {
-				seq_printf(s, "ERROR: out of memory. "
-					   "Part of memory map will not be logged\n");
-				break;
-			}
+	down_read(&dev->lock);
+	for (cnode = rb_first(&dev->clients); cnode; cnode = rb_next(cnode)) {
+		struct rb_node *hnode;
+		client = rb_entry(cnode, struct ion_client, node);
 
-			buffer->heap->ops->phys(buffer->heap, buffer,
-						&(data->addr), &size);
-			data->size = (unsigned long) size;
-			data->addr_end = data->addr + data->size - 1;
-			data->client_name = ion_debug_locate_owner(dev, buffer);
-			ion_debug_mem_map_add(mem_map, data);
+		mutex_lock(&client->lock);
+		for (hnode = rb_first(&client->handles);
+		     hnode;
+		     hnode = rb_next(hnode)) {
+			struct ion_handle *handle = rb_entry(
+				hnode, struct ion_handle, node);
+			if (handle->buffer->heap == heap) {
+				struct mem_map_data *data =
+					kzalloc(sizeof(*data), GFP_KERNEL);
+				if (!data)
+					goto inner_error;
+				heap->ops->phys(heap, handle->buffer,
+							&(data->addr), &size);
+				data->size = (unsigned long) size;
+				data->addr_end = data->addr + data->size - 1;
+				data->client_name = kstrdup(client->name,
+							GFP_KERNEL);
+				if (!data->client_name) {
+					kfree(data);
+					goto inner_error;
+				}
+				list_add(&data->node, mem_map);
+			}
 		}
+		mutex_unlock(&client->lock);
 	}
+	up_read(&dev->lock);
+	return;
+
+inner_error:
+	seq_puts(s,
+		"ERROR: out of memory. Part of memory map will not be logged\n");
+	mutex_unlock(&client->lock);
+	up_read(&dev->lock);
 }
 
 /**
  * Free the memory allocated by ion_debug_mem_map_create
  * @param mem_map The mem map to free.
  */
-static void ion_debug_mem_map_destroy(struct rb_root *mem_map)
+static void ion_debug_mem_map_destroy(struct list_head *mem_map)
 {
 	if (mem_map) {
-		struct rb_node *n;
-		while ((n = rb_first(mem_map)) != 0) {
-			struct mem_map_data *data =
-					rb_entry(n, struct mem_map_data, node);
-			rb_erase(&data->node, mem_map);
+		struct mem_map_data *data, *tmp;
+		list_for_each_entry_safe(data, tmp, mem_map, node) {
+			list_del(&data->node);
+			kfree(data->client_name);
 			kfree(data);
 		}
 	}
 }
 
+static int mem_map_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+	struct mem_map_data *d1, *d2;
+	d1 = list_entry(a, struct mem_map_data, node);
+	d2 = list_entry(b, struct mem_map_data, node);
+	if (d1->addr == d2->addr)
+		return d1->size - d2->size;
+	return d1->addr - d2->addr;
+}
+
 /**
  * Print heap debug information.
  * @param s seq_file to log message to.
@@ -1533,8 +1523,9 @@
 static void ion_heap_print_debug(struct seq_file *s, struct ion_heap *heap)
 {
 	if (heap->ops->print_debug) {
-		struct rb_root mem_map = RB_ROOT;
+		struct list_head mem_map = LIST_HEAD_INIT(mem_map);
 		ion_debug_mem_map_create(s, heap, &mem_map);
+		list_sort(NULL, &mem_map, mem_map_cmp);
 		heap->ops->print_debug(heap, s, &mem_map);
 		ion_debug_mem_map_destroy(&mem_map);
 	}
@@ -1551,6 +1542,7 @@
 	seq_printf(s, "%16.s %16.s %16.s\n", "client", "pid", "size");
 	seq_printf(s, "----------------------------------------------------\n");
 
+	down_read(&dev->lock);
 	for (n = rb_first(&dev->clients); n; n = rb_next(n)) {
 		struct ion_client *client = rb_entry(n, struct ion_client,
 						     node);
@@ -1568,6 +1560,7 @@
 				   client->pid, size);
 		}
 	}
+	up_read(&dev->lock);
 	seq_printf(s, "----------------------------------------------------\n");
 	seq_printf(s, "orphaned allocations (info is from last known client):"
 		   "\n");
@@ -1611,87 +1604,89 @@
 	.release = single_release,
 };
 
-static size_t ion_heap_free_list_is_empty(struct ion_heap *heap)
+#ifdef DEBUG_HEAP_SHRINKER
+static int debug_shrink_set(void *data, u64 val)
 {
-	bool is_empty;
+        struct ion_heap *heap = data;
+        struct shrink_control sc;
+        int objs;
 
-	rt_mutex_lock(&heap->lock);
-	is_empty = list_empty(&heap->free_list);
-	rt_mutex_unlock(&heap->lock);
+        sc.gfp_mask = -1;
+        sc.nr_to_scan = 0;
 
-	return is_empty;
+        if (!val)
+                return 0;
+
+        objs = heap->shrinker.shrink(&heap->shrinker, &sc);
+        sc.nr_to_scan = objs;
+
+        heap->shrinker.shrink(&heap->shrinker, &sc);
+        return 0;
 }
 
-static int ion_heap_deferred_free(void *data)
+static int debug_shrink_get(void *data, u64 *val)
 {
-	struct ion_heap *heap = data;
+        struct ion_heap *heap = data;
+        struct shrink_control sc;
+        int objs;
 
-	while (true) {
-		struct ion_buffer *buffer;
+        sc.gfp_mask = -1;
+        sc.nr_to_scan = 0;
 
-		wait_event_freezable(heap->waitqueue,
-				     !ion_heap_free_list_is_empty(heap));
-
-		rt_mutex_lock(&heap->lock);
-		if (list_empty(&heap->free_list)) {
-			rt_mutex_unlock(&heap->lock);
-			continue;
-		}
-		buffer = list_first_entry(&heap->free_list, struct ion_buffer,
-					  list);
-		list_del(&buffer->list);
-		rt_mutex_unlock(&heap->lock);
-		_ion_buffer_destroy(buffer);
-	}
-
-	return 0;
+        objs = heap->shrinker.shrink(&heap->shrinker, &sc);
+        *val = objs;
+        return 0;
 }
 
-static bool ion_heap_drain_freelist(struct ion_heap *heap)
-{
-	struct ion_buffer *buffer, *tmp;
-
-	if (ion_heap_free_list_is_empty(heap))
-		return false;
-	rt_mutex_lock(&heap->lock);
-	list_for_each_entry_safe(buffer, tmp, &heap->free_list, list) {
-		list_del(&buffer->list);
-		_ion_buffer_destroy(buffer);
-	}
-	BUG_ON(!list_empty(&heap->free_list));
-	rt_mutex_unlock(&heap->lock);
-
-
-	return true;
-}
+DEFINE_SIMPLE_ATTRIBUTE(debug_shrink_fops, debug_shrink_get,
+                        debug_shrink_set, "%llu\n");
+#endif
 
 void ion_device_add_heap(struct ion_device *dev, struct ion_heap *heap)
 {
-	struct sched_param param = { .sched_priority = 0 };
+	struct dentry *debug_file;
 
 	if (!heap->ops->allocate || !heap->ops->free || !heap->ops->map_dma ||
 	    !heap->ops->unmap_dma)
 		pr_err("%s: can not add heap with invalid ops struct.\n",
 		       __func__);
 
-	if (heap->flags & ION_HEAP_FLAG_DEFER_FREE) {
-		INIT_LIST_HEAD(&heap->free_list);
-		rt_mutex_init(&heap->lock);
-		init_waitqueue_head(&heap->waitqueue);
-		heap->task = kthread_run(ion_heap_deferred_free, heap,
-					 "%s", heap->name);
-		sched_setscheduler(heap->task, SCHED_IDLE, &param);
-		if (IS_ERR(heap->task))
-			pr_err("%s: creating thread for deferred free failed\n",
-			       __func__);
-	}
+	if (heap->flags & ION_HEAP_FLAG_DEFER_FREE)
+		ion_heap_init_deferred_free(heap);
 
 	heap->dev = dev;
 	down_write(&dev->lock);
-	plist_node_init(&heap->node, heap->id);
+	/* use negative heap->id to reverse the priority -- when traversing
+	   the list later attempt higher id numbers first */
+	plist_node_init(&heap->node, -heap->id);
 	plist_add(&heap->node, &dev->heaps);
-	debugfs_create_file(heap->name, 0664, dev->debug_root, heap,
-			    &debug_heap_fops);
+	debug_file = debugfs_create_file(heap->name, 0664,
+					dev->heaps_debug_root, heap,
+					&debug_heap_fops);
+
+	if (!debug_file) {
+		char buf[256], *path;
+		path = dentry_path(dev->heaps_debug_root, buf, 256);
+		pr_err("Failed to created heap debugfs at %s/%s\n",
+			path, heap->name);
+	}
+
+#ifdef DEBUG_HEAP_SHRINKER
+	if (heap->shrinker.shrink) {
+		char debug_name[64];
+
+		snprintf(debug_name, 64, "%s_shrink", heap->name);
+		debug_file = debugfs_create_file(
+			debug_name, 0644, dev->heaps_debug_root, heap,
+			&debug_shrink_fops);
+		if (!debug_file) {
+			char buf[256], *path;
+			path = dentry_path(dev->heaps_debug_root, buf, 256);
+			pr_err("Failed to created heap shrinker debugfs at %s/%s\n",
+				path, debug_name);
+		}
+	}
+#endif
 	up_write(&dev->lock);
 }
 
@@ -1839,8 +1834,21 @@
 	}
 
 	idev->debug_root = debugfs_create_dir("ion", NULL);
-	if (IS_ERR_OR_NULL(idev->debug_root))
-		pr_err("ion: failed to create debug files.\n");
+	if (!idev->debug_root) {
+		pr_err("ion: failed to create debugfs root directory.\n");
+		goto debugfs_done;
+	}
+	idev->heaps_debug_root = debugfs_create_dir("heaps", idev->debug_root);
+	if (!idev->heaps_debug_root) {
+		pr_err("ion: failed to create debugfs heaps directory.\n");
+		goto debugfs_done;
+	}
+	idev->clients_debug_root = debugfs_create_dir("clients",
+						idev->debug_root);
+	if (!idev->clients_debug_root)
+		pr_err("ion: failed to create debugfs clients directory.\n");
+
+debugfs_done:
 
 	idev->custom_ioctl = custom_ioctl;
 	idev->buffers = RB_ROOT;
@@ -1854,6 +1862,7 @@
 void ion_device_destroy(struct ion_device *dev)
 {
 	misc_deregister(&dev->dev);
+	debugfs_remove_recursive(dev->debug_root);
 	/* XXX need to free the heaps and clients ? */
 	kfree(dev);
 }
diff --git a/drivers/gpu/ion/ion_carveout_heap.c b/drivers/gpu/ion/ion_carveout_heap.c
index 08921299..d25e928 100644
--- a/drivers/gpu/ion/ion_carveout_heap.c
+++ b/drivers/gpu/ion/ion_carveout_heap.c
@@ -162,7 +162,7 @@
 }
 
 static int ion_carveout_print_debug(struct ion_heap *heap, struct seq_file *s,
-				    const struct rb_root *mem_map)
+				    const struct list_head *mem_map)
 {
 	struct ion_carveout_heap *carveout_heap =
 		container_of(heap, struct ion_carveout_heap, heap);
@@ -176,16 +176,14 @@
 		unsigned long size = carveout_heap->total_size;
 		unsigned long end = base+size;
 		unsigned long last_end = base;
-		struct rb_node *n;
+		struct mem_map_data *data;
 
 		seq_printf(s, "\nMemory Map\n");
 		seq_printf(s, "%16.s %14.s %14.s %14.s\n",
 			   "client", "start address", "end address",
 			   "size (hex)");
 
-		for (n = rb_first(mem_map); n; n = rb_next(n)) {
-			struct mem_map_data *data =
-					rb_entry(n, struct mem_map_data, node);
+		list_for_each_entry(data, mem_map, node) {
 			const char *client_name = "(null)";
 
 			if (last_end < data->addr) {
diff --git a/drivers/gpu/ion/ion_cma_heap.c b/drivers/gpu/ion/ion_cma_heap.c
index b24b2bd..d4bbab7 100644
--- a/drivers/gpu/ion/ion_cma_heap.c
+++ b/drivers/gpu/ion/ion_cma_heap.c
@@ -180,19 +180,17 @@
 }
 
 static int ion_cma_print_debug(struct ion_heap *heap, struct seq_file *s,
-			const struct rb_root *mem_map)
+			const struct list_head *mem_map)
 {
 	if (mem_map) {
-		struct rb_node *n;
+		struct mem_map_data *data;
 
 		seq_printf(s, "\nMemory Map\n");
 		seq_printf(s, "%16.s %14.s %14.s %14.s\n",
 			   "client", "start address", "end address",
 			   "size (hex)");
 
-		for (n = rb_first(mem_map); n; n = rb_next(n)) {
-			struct mem_map_data *data =
-					rb_entry(n, struct mem_map_data, node);
+		list_for_each_entry(data, mem_map, node) {
 			const char *client_name = "(null)";
 
 
diff --git a/drivers/gpu/ion/ion_cma_secure_heap.c b/drivers/gpu/ion/ion_cma_secure_heap.c
index 90451ca..bdf48b3 100644
--- a/drivers/gpu/ion/ion_cma_secure_heap.c
+++ b/drivers/gpu/ion/ion_cma_secure_heap.c
@@ -234,19 +234,17 @@
 }
 
 static int ion_secure_cma_print_debug(struct ion_heap *heap, struct seq_file *s,
-			const struct rb_root *mem_map)
+			const struct list_head *mem_map)
 {
 	if (mem_map) {
-		struct rb_node *n;
+		struct mem_map_data *data;
 
 		seq_printf(s, "\nMemory Map\n");
 		seq_printf(s, "%16.s %14.s %14.s %14.s\n",
 			   "client", "start address", "end address",
 			   "size (hex)");
 
-		for (n = rb_first(mem_map); n; n = rb_next(n)) {
-			struct mem_map_data *data =
-					rb_entry(n, struct mem_map_data, node);
+		list_for_each_entry(data, mem_map, node) {
 			const char *client_name = "(null)";
 
 
diff --git a/drivers/gpu/ion/ion_cp_heap.c b/drivers/gpu/ion/ion_cp_heap.c
index f2f4fad..8cb90e5 100644
--- a/drivers/gpu/ion/ion_cp_heap.c
+++ b/drivers/gpu/ion/ion_cp_heap.c
@@ -622,7 +622,7 @@
 }
 
 static int ion_cp_print_debug(struct ion_heap *heap, struct seq_file *s,
-			      const struct rb_root *mem_map)
+			      const struct list_head *mem_map)
 {
 	unsigned long total_alloc;
 	unsigned long total_size;
@@ -651,16 +651,14 @@
 		unsigned long size = cp_heap->total_size;
 		unsigned long end = base+size;
 		unsigned long last_end = base;
-		struct rb_node *n;
+		struct mem_map_data *data;
 
 		seq_printf(s, "\nMemory Map\n");
 		seq_printf(s, "%16.s %14.s %14.s %14.s\n",
 			   "client", "start address", "end address",
 			   "size (hex)");
 
-		for (n = rb_first(mem_map); n; n = rb_next(n)) {
-			struct mem_map_data *data =
-					rb_entry(n, struct mem_map_data, node);
+		list_for_each_entry(data, mem_map, node) {
 			const char *client_name = "(null)";
 
 			if (last_end < data->addr) {
diff --git a/drivers/gpu/ion/ion_heap.c b/drivers/gpu/ion/ion_heap.c
index 3d37541..9d33bf4 100644
--- a/drivers/gpu/ion/ion_heap.c
+++ b/drivers/gpu/ion/ion_heap.c
@@ -16,10 +16,16 @@
  */
 
 #include <linux/err.h>
+#include <linux/freezer.h>
 #include <linux/ion.h>
+#include <linux/kthread.h>
 #include <linux/mm.h>
+#include <linux/rtmutex.h>
+#include <linux/sched.h>
 #include <linux/scatterlist.h>
 #include <linux/vmalloc.h>
+#include <linux/slab.h>
+#include <linux/highmem.h>
 #include "ion_priv.h"
 
 void *ion_heap_map_kernel(struct ion_heap *heap,
@@ -94,9 +100,155 @@
 	return 0;
 }
 
+#define MAX_VMAP_RETRIES 10
+
+/**
+ * An optimized page-zero'ing function. vmaps arrays of pages in large
+ * chunks to minimize the number of memsets and vmaps/vunmaps.
+ *
+ * Note that the `pages' array should be composed of all 4K pages.
+ */
+int ion_heap_pages_zero(struct page **pages, int num_pages,
+				bool should_invalidate)
+{
+	int i, j, k, npages_to_vmap;
+	void *ptr = NULL;
+	/*
+	 * It's cheaper just to use writecombine memory and skip the
+	 * cache vs. using a cache memory and trying to flush it afterwards
+	 */
+	pgprot_t pgprot = pgprot_writecombine(pgprot_kernel);
+
+	/*
+	 * As an optimization, we manually zero out all of the pages
+	 * in one fell swoop here. To safeguard against insufficient
+	 * vmalloc space, we only vmap `npages_to_vmap' at a time,
+	 * starting with a conservative estimate of 1/8 of the total
+	 * number of vmalloc pages available.
+	 */
+	npages_to_vmap = ((VMALLOC_END - VMALLOC_START)/8)
+			>> PAGE_SHIFT;
+	for (i = 0; i < num_pages; i += npages_to_vmap) {
+		npages_to_vmap = min(npages_to_vmap, num_pages - i);
+		for (j = 0; j < MAX_VMAP_RETRIES && npages_to_vmap;
+			++j) {
+			ptr = vmap(&pages[i], npages_to_vmap,
+					VM_IOREMAP, pgprot);
+			if (ptr)
+				break;
+			else
+				npages_to_vmap >>= 1;
+		}
+		if (!ptr)
+			return -ENOMEM;
+
+		memset(ptr, 0, npages_to_vmap * PAGE_SIZE);
+		if (should_invalidate) {
+			/*
+			 * invalidate the cache to pick up the zeroing
+			 */
+			for (k = 0; k < npages_to_vmap; k++) {
+				void *p = kmap_atomic(pages[i + k]);
+				phys_addr_t phys = page_to_phys(
+							pages[i + k]);
+
+				dmac_inv_range(p, p + PAGE_SIZE);
+				outer_inv_range(phys, phys + PAGE_SIZE);
+				kunmap_atomic(p);
+			}
+		}
+		vunmap(ptr);
+	}
+
+	return 0;
+}
+
+static int ion_heap_alloc_pages_mem(int page_tbl_size,
+				struct pages_mem *pages_mem)
+{
+	struct page **pages;
+	pages_mem->free_fn = kfree;
+	if (page_tbl_size > SZ_8K) {
+		/*
+		 * Do fallback to ensure we have a balance between
+		 * performance and availability.
+		 */
+		pages = kmalloc(page_tbl_size,
+				__GFP_COMP | __GFP_NORETRY |
+				__GFP_NO_KSWAPD | __GFP_NOWARN);
+		if (!pages) {
+			pages = vmalloc(page_tbl_size);
+			pages_mem->free_fn = vfree;
+		}
+	} else {
+		pages = kmalloc(page_tbl_size, GFP_KERNEL);
+	}
+
+	if (!pages)
+		return -ENOMEM;
+
+	pages_mem->pages = pages;
+	return 0;
+}
+
+static void ion_heap_free_pages_mem(struct pages_mem *pages_mem)
+{
+	pages_mem->free_fn(pages_mem->pages);
+}
+
+int ion_heap_high_order_page_zero(struct page *page,
+				int order, bool should_invalidate)
+{
+	int i, ret;
+	struct pages_mem pages_mem;
+	int npages = 1 << order;
+	int page_tbl_size = sizeof(struct page *) * npages;
+
+	if (ion_heap_alloc_pages_mem(page_tbl_size, &pages_mem))
+		return -ENOMEM;
+
+	for (i = 0; i < (1 << order); ++i)
+		pages_mem.pages[i] = page + i;
+
+	ret = ion_heap_pages_zero(pages_mem.pages, npages,
+				should_invalidate);
+	ion_heap_free_pages_mem(&pages_mem);
+	return ret;
+}
+
 int ion_heap_buffer_zero(struct ion_buffer *buffer)
 {
 	struct sg_table *table = buffer->sg_table;
+	struct scatterlist *sg;
+	int i, j, ret = 0, npages = 0, page_tbl_size = 0;
+	struct pages_mem pages_mem;
+
+	for_each_sg(table->sgl, sg, table->nents, i) {
+		unsigned long len = sg_dma_len(sg);
+		int nrpages = len >> PAGE_SHIFT;
+		page_tbl_size += sizeof(struct page *) * nrpages;
+	}
+
+	if (ion_heap_alloc_pages_mem(page_tbl_size, &pages_mem))
+		return -ENOMEM;
+
+	for_each_sg(table->sgl, sg, table->nents, i) {
+		struct page *page = sg_page(sg);
+		unsigned long len = sg_dma_len(sg);
+
+		for (j = 0; j < len / PAGE_SIZE; j++)
+			pages_mem.pages[npages++] = page + j;
+	}
+
+	ret = ion_heap_pages_zero(pages_mem.pages, npages,
+				ion_buffer_cached(buffer));
+	ion_heap_free_pages_mem(&pages_mem);
+	return ret;
+}
+
+int ion_heap_buffer_zero_old(struct ion_buffer *buffer)
+{
+	struct sg_table *table = buffer->sg_table;
 	pgprot_t pgprot;
 	struct scatterlist *sg;
 	struct vm_struct *vm_struct;
@@ -131,6 +283,122 @@
 	return ret;
 }
 
+void ion_heap_free_page(struct ion_buffer *buffer, struct page *page,
+		       unsigned int order)
+{
+	int i;
+
+	if (!ion_buffer_fault_user_mappings(buffer)) {
+		__free_pages(page, order);
+		return;
+	}
+	for (i = 0; i < (1 << order); i++)
+		__free_page(page + i);
+}
+
+void ion_heap_freelist_add(struct ion_heap *heap, struct ion_buffer * buffer)
+{
+	rt_mutex_lock(&heap->lock);
+	list_add(&buffer->list, &heap->free_list);
+	heap->free_list_size += buffer->size;
+	rt_mutex_unlock(&heap->lock);
+	wake_up(&heap->waitqueue);
+}
+
+size_t ion_heap_freelist_size(struct ion_heap *heap)
+{
+	size_t size;
+
+	rt_mutex_lock(&heap->lock);
+	size = heap->free_list_size;
+	rt_mutex_unlock(&heap->lock);
+
+	return size;
+}
+
+static size_t _ion_heap_freelist_drain(struct ion_heap *heap, size_t size,
+				bool skip_pools)
+{
+	struct ion_buffer *buffer, *tmp;
+	size_t total_drained = 0;
+
+	if (ion_heap_freelist_size(heap) == 0)
+		return 0;
+
+	rt_mutex_lock(&heap->lock);
+	if (size == 0)
+		size = heap->free_list_size;
+
+	list_for_each_entry_safe(buffer, tmp, &heap->free_list, list) {
+		if (total_drained >= size)
+			break;
+		list_del(&buffer->list);
+		ion_buffer_destroy(buffer);
+		heap->free_list_size -= buffer->size;
+		if (skip_pools)
+			buffer->flags |= ION_FLAG_FREED_FROM_SHRINKER;
+		total_drained += buffer->size;
+	}
+	rt_mutex_unlock(&heap->lock);
+
+	return total_drained;
+}
+
+size_t ion_heap_freelist_drain(struct ion_heap *heap, size_t size)
+{
+	return _ion_heap_freelist_drain(heap, size, false);
+}
+
+size_t ion_heap_freelist_drain_from_shrinker(struct ion_heap *heap, size_t size)
+{
+	return _ion_heap_freelist_drain(heap, size, true);
+}
+
+int ion_heap_deferred_free(void *data)
+{
+	struct ion_heap *heap = data;
+
+	while (true) {
+		struct ion_buffer *buffer;
+
+		wait_event_freezable(heap->waitqueue,
+				     ion_heap_freelist_size(heap) > 0);
+
+		rt_mutex_lock(&heap->lock);
+		if (list_empty(&heap->free_list)) {
+			rt_mutex_unlock(&heap->lock);
+			continue;
+		}
+		buffer = list_first_entry(&heap->free_list, struct ion_buffer,
+					  list);
+		list_del(&buffer->list);
+		heap->free_list_size -= buffer->size;
+		rt_mutex_unlock(&heap->lock);
+		ion_buffer_destroy(buffer);
+	}
+
+	return 0;
+}
+
+int ion_heap_init_deferred_free(struct ion_heap *heap)
+{
+	struct sched_param param = { .sched_priority = 0 };
+
+	INIT_LIST_HEAD(&heap->free_list);
+	heap->free_list_size = 0;
+	rt_mutex_init(&heap->lock);
+	init_waitqueue_head(&heap->waitqueue);
+	heap->task = kthread_run(ion_heap_deferred_free, heap,
+				 "%s", heap->name);
+	sched_setscheduler(heap->task, SCHED_IDLE, &param);
+	if (IS_ERR(heap->task)) {
+		pr_err("%s: creating thread for deferred free failed\n",
+		       __func__);
+		return PTR_RET(heap->task);
+	}
+	return 0;
+}
+
 struct ion_heap *ion_heap_create(struct ion_platform_heap *heap_data)
 {
 	struct ion_heap *heap = NULL;
diff --git a/drivers/gpu/ion/ion_iommu_heap.c b/drivers/gpu/ion/ion_iommu_heap.c
deleted file mode 100644
index d9e9e09..0000000
--- a/drivers/gpu/ion/ion_iommu_heap.c
+++ /dev/null
@@ -1,588 +0,0 @@
-/*
- * Copyright (c) 2011-2013, The Linux Foundation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 and
- * only version 2 as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/msm_ion.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/scatterlist.h>
-#include <linux/slab.h>
-#include <linux/vmalloc.h>
-#include <linux/iommu.h>
-#include <linux/pfn.h>
-#include <linux/dma-mapping.h>
-#include "ion_priv.h"
-
-#include <asm/mach/map.h>
-#include <asm/page.h>
-#include <asm/cacheflush.h>
-#include <mach/iommu_domains.h>
-#include <trace/events/kmem.h>
-
-struct ion_iommu_heap {
-	struct ion_heap heap;
-	struct ion_page_pool **cached_pools;
-	struct ion_page_pool **uncached_pools;
-};
-
-/*
- * We will attempt to allocate high-order pages and store those in an
- * sg_list. However, some APIs expect an array of struct page * where
- * each page is of size PAGE_SIZE. We use this extra structure to
- * carry around an array of such pages (derived from the high-order
- * pages with nth_page).
- */
-struct ion_iommu_priv_data {
-	struct page **pages;
-	unsigned int pages_uses_vmalloc;
-	int nrpages;
-	unsigned long size;
-};
-
-#define MAX_VMAP_RETRIES 10
-#define BAD_ORDER	-1
-
-static const unsigned int orders[] = {9, 8, 4, 0};
-static const int num_orders = ARRAY_SIZE(orders);
-static unsigned int low_gfp_flags = __GFP_HIGHMEM | GFP_KERNEL | __GFP_ZERO;
-static unsigned int high_gfp_flags = (__GFP_HIGHMEM | __GFP_NORETRY
-				| __GFP_NO_KSWAPD | __GFP_NOWARN |
-				 __GFP_IO | __GFP_FS | __GFP_ZERO);
-
-struct page_info {
-	struct page *page;
-	unsigned int order;
-	struct list_head list;
-};
-
-static int order_to_index(unsigned int order)
-{
-	int i;
-	for (i = 0; i < num_orders; i++)
-		if (order == orders[i])
-			return i;
-	BUG();
-	return BAD_ORDER;
-}
-
-static unsigned int order_to_size(int order)
-{
-	return PAGE_SIZE << order;
-}
-
-static struct page_info *alloc_largest_available(struct ion_iommu_heap *heap,
-						unsigned long size,
-						unsigned int max_order,
-						unsigned long flags)
-{
-	struct page *page;
-	struct page_info *info;
-	int i;
-
-	for (i = 0; i < num_orders; i++) {
-		gfp_t gfp;
-		int idx = order_to_index(orders[i]);
-		struct ion_page_pool *pool;
-
-		if (idx == BAD_ORDER)
-			continue;
-
-		if (ION_IS_CACHED(flags)) {
-			pool = heap->cached_pools[idx];
-			BUG_ON(!pool);
-		} else {
-			pool = heap->uncached_pools[idx];
-			BUG_ON(!pool);
-		}
-
-		if (size < order_to_size(orders[i]))
-			continue;
-		if (max_order < orders[i])
-			continue;
-
-		if (orders[i]) {
-			gfp = high_gfp_flags;
-		} else {
-			gfp = low_gfp_flags;
-		}
-		trace_alloc_pages_iommu_start(gfp, orders[i]);
-		if (flags & ION_FLAG_POOL_FORCE_ALLOC)
-			page = alloc_pages(gfp, orders[i]);
-		else
-			page = ion_page_pool_alloc(pool);
-		trace_alloc_pages_iommu_end(gfp, orders[i]);
-		if (!page) {
-			trace_alloc_pages_iommu_fail(gfp, orders[i]);
-			continue;
-		}
-
-		info = kmalloc(sizeof(struct page_info), GFP_KERNEL);
-		if (info) {
-			info->page = page;
-			info->order = orders[i];
-		}
-		return info;
-	}
-	return NULL;
-}
-
-static int ion_iommu_buffer_zero(struct ion_iommu_priv_data *data,
-				bool is_cached)
-{
-	int i, j, k;
-	unsigned int npages_to_vmap;
-	unsigned int total_pages;
-	void *ptr = NULL;
-	/*
-	 * It's cheaper just to use writecombine memory and skip the
-	 * cache vs. using a cache memory and trying to flush it afterwards
-	 */
-	pgprot_t pgprot = pgprot_writecombine(pgprot_kernel);
-
-	/*
-	 * As an optimization, we manually zero out all of the
-	 * pages in one fell swoop here. To safeguard against
-	 * insufficient vmalloc space, we only vmap
-	 * `npages_to_vmap' at a time, starting with a
-	 * conservative estimate of 1/8 of the total number of
-	 * vmalloc pages available. Note that the `pages'
-	 * array is composed of all 4K pages, irrespective of
-	 * the size of the pages on the sg list.
-	 */
-	npages_to_vmap = ((VMALLOC_END - VMALLOC_START)/8)
-			>> PAGE_SHIFT;
-	total_pages = data->nrpages;
-	for (i = 0; i < total_pages; i += npages_to_vmap) {
-		npages_to_vmap = min(npages_to_vmap, total_pages - i);
-		for (j = 0; j < MAX_VMAP_RETRIES && npages_to_vmap;
-			++j) {
-			ptr = vmap(&data->pages[i], npages_to_vmap,
-					VM_IOREMAP, pgprot);
-			if (ptr)
-				break;
-			else
-				npages_to_vmap >>= 1;
-		}
-		if (!ptr)
-			return -ENOMEM;
-
-		memset(ptr, 0, npages_to_vmap * PAGE_SIZE);
-		if (is_cached) {
-			/*
-			 * invalidate the cache to pick up the zeroing
-			 */
-			for (k = 0; k < npages_to_vmap; k++) {
-				void *p = kmap_atomic(data->pages[i + k]);
-				phys_addr_t phys = page_to_phys(
-							data->pages[i + k]);
-
-				dmac_inv_range(p, p + PAGE_SIZE);
-				outer_inv_range(phys, phys + PAGE_SIZE);
-				kunmap_atomic(p);
-			}
-		}
-		vunmap(ptr);
-	}
-
-	return 0;
-}
-
-static int ion_iommu_heap_allocate(struct ion_heap *heap,
-				      struct ion_buffer *buffer,
-				      unsigned long size, unsigned long align,
-				      unsigned long flags)
-{
-	int ret, i;
-	struct list_head pages_list;
-	struct page_info *info, *tmp_info;
-	struct ion_iommu_priv_data *data = NULL;
-	struct ion_iommu_heap *iommu_heap =
-		container_of(heap, struct ion_iommu_heap, heap);
-
-	if (msm_use_iommu()) {
-		struct scatterlist *sg;
-		struct sg_table *table;
-		int j;
-		unsigned int num_large_pages = 0;
-		unsigned long size_remaining = PAGE_ALIGN(size);
-		unsigned int max_order = ION_IS_CACHED(flags) ? 0 : orders[0];
-		unsigned int page_tbl_size;
-
-		data = kmalloc(sizeof(*data), GFP_KERNEL);
-		if (!data)
-			return -ENOMEM;
-
-		INIT_LIST_HEAD(&pages_list);
-		while (size_remaining > 0) {
-			info = alloc_largest_available(iommu_heap,
-						size_remaining,
-						max_order,
-						flags);
-			if (!info) {
-				ret = -ENOMEM;
-				goto err_free_data;
-			}
-			list_add_tail(&info->list, &pages_list);
-			size_remaining -= order_to_size(info->order);
-			max_order = info->order;
-			num_large_pages++;
-		}
-
-		data->size = PFN_ALIGN(size);
-		data->nrpages = data->size >> PAGE_SHIFT;
-		data->pages_uses_vmalloc = 0;
-		page_tbl_size = sizeof(struct page *) * data->nrpages;
-
-		if (page_tbl_size > SZ_8K) {
-			/*
-			 * Do fallback to ensure we have a balance between
-			 * performance and availability.
-			 */
-			data->pages = kmalloc(page_tbl_size,
-					      __GFP_COMP | __GFP_NORETRY |
-					      __GFP_NO_KSWAPD | __GFP_NOWARN);
-			if (!data->pages) {
-				data->pages = vmalloc(page_tbl_size);
-				data->pages_uses_vmalloc = 1;
-			}
-		} else {
-			data->pages = kmalloc(page_tbl_size, GFP_KERNEL);
-		}
-		if (!data->pages) {
-			ret = -ENOMEM;
-			goto err_free_data;
-		}
-
-		table = buffer->sg_table =
-				kzalloc(sizeof(struct sg_table), GFP_KERNEL);
-
-		if (!table) {
-			ret = -ENOMEM;
-			goto err1;
-		}
-		ret = sg_alloc_table(table, num_large_pages, GFP_KERNEL);
-		if (ret)
-			goto err2;
-
-		i = 0;
-		sg = table->sgl;
-		list_for_each_entry_safe(info, tmp_info, &pages_list, list) {
-			struct page *page = info->page;
-			sg_set_page(sg, page, order_to_size(info->order), 0);
-			sg_dma_address(sg) = sg_phys(sg);
-			sg = sg_next(sg);
-			for (j = 0; j < (1 << info->order); ++j)
-				data->pages[i++] = nth_page(page, j);
-			list_del(&info->list);
-			kfree(info);
-		}
-
-
-		if (flags & ION_FLAG_POOL_FORCE_ALLOC) {
-			ret = ion_iommu_buffer_zero(data, ION_IS_CACHED(flags));
-			if (ret) {
-				pr_err("Couldn't vmap the pages for zeroing\n");
-				goto err3;
-			}
-
-
-			if (!ION_IS_CACHED(flags))
-				dma_sync_sg_for_device(NULL, table->sgl,
-						table->nents,
-						DMA_BIDIRECTIONAL);
-
-		}
-		buffer->priv_virt = data;
-		return 0;
-
-	} else {
-		return -ENOMEM;
-	}
-
-
-err3:
-	sg_free_table(buffer->sg_table);
-err2:
-	kfree(buffer->sg_table);
-	buffer->sg_table = 0;
-err1:
-	if (data->pages_uses_vmalloc)
-		vfree(data->pages);
-	else
-		kfree(data->pages);
-err_free_data:
-	kfree(data);
-
-	list_for_each_entry_safe(info, tmp_info, &pages_list, list) {
-		if (info->page)
-			__free_pages(info->page, info->order);
-		list_del(&info->list);
-		kfree(info);
-	}
-	return ret;
-}
-
-static void ion_iommu_heap_free(struct ion_buffer *buffer)
-{
-	int i;
-	struct scatterlist *sg;
-	struct sg_table *table = buffer->sg_table;
-	struct ion_iommu_priv_data *data = buffer->priv_virt;
-	bool cached = ion_buffer_cached(buffer);
-	struct ion_iommu_heap *iommu_heap =
-	     container_of(buffer->heap, struct	ion_iommu_heap, heap);
-
-	if (!table)
-		return;
-	if (!data)
-		return;
-
-	if (!(buffer->flags & ION_FLAG_POOL_FORCE_ALLOC))
-		ion_iommu_buffer_zero(data, ION_IS_CACHED(buffer->flags));
-
-	for_each_sg(table->sgl, sg, table->nents, i) {
-		int order = get_order(sg_dma_len(sg));
-		int idx = order_to_index(order);
-		struct ion_page_pool *pool;
-
-		if (idx == BAD_ORDER) {
-			WARN_ON(1);
-			continue;
-		}
-
-		if (cached)
-			pool = iommu_heap->cached_pools[idx];
-		else
-			pool = iommu_heap->uncached_pools[idx];
-
-		if (buffer->flags & ION_FLAG_POOL_FORCE_ALLOC)
-			__free_pages(sg_page(sg), order);
-		else
-			ion_page_pool_free(pool, sg_page(sg));
-	}
-
-	sg_free_table(table);
-	kfree(table);
-	table = 0;
-	if (data->pages_uses_vmalloc)
-		vfree(data->pages);
-	else
-		kfree(data->pages);
-	kfree(data);
-}
-
-void *ion_iommu_heap_map_kernel(struct ion_heap *heap,
-				struct ion_buffer *buffer)
-{
-	struct ion_iommu_priv_data *data = buffer->priv_virt;
-	pgprot_t page_prot = PAGE_KERNEL;
-
-	if (!data)
-		return NULL;
-
-	if (!ION_IS_CACHED(buffer->flags))
-		page_prot = pgprot_writecombine(page_prot);
-
-	buffer->vaddr = vmap(data->pages, data->nrpages, VM_IOREMAP, page_prot);
-
-	return buffer->vaddr;
-}
-
-void ion_iommu_heap_unmap_kernel(struct ion_heap *heap,
-				    struct ion_buffer *buffer)
-{
-	if (!buffer->vaddr)
-		return;
-
-	vunmap(buffer->vaddr);
-	buffer->vaddr = NULL;
-}
-
-int ion_iommu_heap_map_user(struct ion_heap *heap, struct ion_buffer *buffer,
-			       struct vm_area_struct *vma)
-{
-	struct sg_table *table = buffer->sg_table;
-	unsigned long addr = vma->vm_start;
-	unsigned long offset = vma->vm_pgoff * PAGE_SIZE;
-	struct scatterlist *sg;
-	int i;
-
-	if (!ION_IS_CACHED(buffer->flags))
-		vma->vm_page_prot = pgprot_writecombine(vma->vm_page_prot);
-
-	for_each_sg(table->sgl, sg, table->nents, i) {
-		struct page *page = sg_page(sg);
-		unsigned long remainder = vma->vm_end - addr;
-		unsigned long len = sg_dma_len(sg);
-
-		if (offset >= sg_dma_len(sg)) {
-			offset -= sg_dma_len(sg);
-			continue;
-		} else if (offset) {
-			page += offset / PAGE_SIZE;
-			len = sg_dma_len(sg) - offset;
-			offset = 0;
-		}
-		len = min(len, remainder);
-		remap_pfn_range(vma, addr, page_to_pfn(page), len,
-				vma->vm_page_prot);
-		addr += len;
-		if (addr >= vma->vm_end)
-			return 0;
-	}
-	return 0;
-}
-
-static struct sg_table *ion_iommu_heap_map_dma(struct ion_heap *heap,
-					      struct ion_buffer *buffer)
-{
-	return buffer->sg_table;
-}
-
-static void ion_iommu_heap_unmap_dma(struct ion_heap *heap,
-				 struct ion_buffer *buffer)
-{
-}
-
-static int ion_iommu_heap_debug_show(struct ion_heap *heap, struct seq_file *s,
-					void *unused)
-{
-
-	struct ion_iommu_heap *iommu_heap = container_of(heap,
-							struct ion_iommu_heap,
-							heap);
-	int i;
-	unsigned long total = 0;
-
-	seq_printf(s, "Cached Pools:\n");
-	for (i = 0; i < num_orders; i++) {
-		struct ion_page_pool *pool = iommu_heap->cached_pools[i];
-		seq_printf(s, "%d order %u highmem pages in pool = %lx total\n",
-			   pool->high_count, pool->order,
-			   (1 << pool->order) * PAGE_SIZE * pool->high_count);
-		seq_printf(s, "%d order %u lowmem pages in pool = %lx total\n",
-			   pool->low_count, pool->order,
-			   (1 << pool->order) * PAGE_SIZE * pool->low_count);
-
-		total += (1 << pool->order) * PAGE_SIZE *
-			  (pool->low_count + pool->high_count);
-	}
-
-	seq_printf(s, "Uncached Pools:\n");
-	for (i = 0; i < num_orders; i++) {
-		struct ion_page_pool *pool = iommu_heap->uncached_pools[i];
-		seq_printf(s, "%d order %u highmem pages in pool = %lx total\n",
-			   pool->high_count, pool->order,
-			   (1 << pool->order) * PAGE_SIZE * pool->high_count);
-		seq_printf(s, "%d order %u lowmem pages in pool = %lx total\n",
-			   pool->low_count, pool->order,
-			   (1 << pool->order) * PAGE_SIZE * pool->low_count);
-
-		total += (1 << pool->order) * PAGE_SIZE *
-			  (pool->low_count + pool->high_count);
-	}
-	seq_printf(s, "Total bytes in pool: %lx\n", total);
-	return 0;
-}
-
-static struct ion_heap_ops iommu_heap_ops = {
-	.allocate = ion_iommu_heap_allocate,
-	.free = ion_iommu_heap_free,
-	.map_user = ion_iommu_heap_map_user,
-	.map_kernel = ion_iommu_heap_map_kernel,
-	.unmap_kernel = ion_iommu_heap_unmap_kernel,
-	.map_dma = ion_iommu_heap_map_dma,
-	.unmap_dma = ion_iommu_heap_unmap_dma,
-};
-
-struct ion_heap *ion_iommu_heap_create(struct ion_platform_heap *heap_data)
-{
-	struct ion_iommu_heap *iommu_heap;
-	int i;
-
-	iommu_heap = kzalloc(sizeof(struct ion_iommu_heap), GFP_KERNEL);
-	if (!iommu_heap)
-		return ERR_PTR(-ENOMEM);
-
-	iommu_heap->heap.ops = &iommu_heap_ops;
-	iommu_heap->heap.type = ION_HEAP_TYPE_IOMMU;
-	iommu_heap->uncached_pools = kzalloc(
-			      sizeof(struct ion_page_pool *) * num_orders,
-			      GFP_KERNEL);
-	if (!iommu_heap->uncached_pools)
-		goto err_alloc_uncached_pools;
-
-	iommu_heap->cached_pools = kzalloc(
-			      sizeof(struct ion_page_pool *) * num_orders,
-			      GFP_KERNEL);
-
-	if (!iommu_heap->cached_pools)
-		goto err_alloc_cached_pools;
-
-	for (i = 0; i < num_orders; i++) {
-		struct ion_page_pool *pool;
-		gfp_t gfp_flags;
-
-		if (orders[i])
-			gfp_flags = high_gfp_flags | __GFP_ZERO;
-		else
-			gfp_flags = low_gfp_flags | __GFP_ZERO;
-		pool = ion_page_pool_create(gfp_flags, orders[i]);
-		if (!pool)
-			goto err_create_cached_pool;
-		iommu_heap->cached_pools[i] = pool;
-	}
-
-	for (i = 0; i < num_orders; i++) {
-		struct ion_page_pool *pool;
-		gfp_t gfp_flags;
-
-		if (orders[i])
-			gfp_flags = high_gfp_flags | __GFP_ZERO;
-		else
-			gfp_flags = low_gfp_flags | __GFP_ZERO;
-		pool = ion_page_pool_create(gfp_flags, orders[i]);
-		if (!pool)
-			goto err_create_uncached_pool;
-		iommu_heap->uncached_pools[i] = pool;
-	}
-	iommu_heap->heap.debug_show = ion_iommu_heap_debug_show;
-	return &iommu_heap->heap;
-
-err_create_uncached_pool:
-	for (i = 0; i < num_orders; i++)
-		if (iommu_heap->cached_pools[i])
-			ion_page_pool_destroy(iommu_heap->uncached_pools[i]);
-
-
-err_create_cached_pool:
-	for (i = 0; i < num_orders; i++)
-		if (iommu_heap->uncached_pools[i])
-			ion_page_pool_destroy(iommu_heap->cached_pools[i]);
-
-	kfree(iommu_heap->cached_pools);
-err_alloc_cached_pools:
-	kfree(iommu_heap->uncached_pools);
-err_alloc_uncached_pools:
-	kfree(iommu_heap);
-	return ERR_PTR(-ENOMEM);
-}
-
-void ion_iommu_heap_destroy(struct ion_heap *heap)
-{
-	struct ion_iommu_heap *iommu_heap =
-	     container_of(heap, struct  ion_iommu_heap, heap);
-
-	kfree(iommu_heap);
-	iommu_heap = NULL;
-}
diff --git a/drivers/gpu/ion/ion_page_pool.c b/drivers/gpu/ion/ion_page_pool.c
index 495dd24..94f9445 100644
--- a/drivers/gpu/ion/ion_page_pool.c
+++ b/drivers/gpu/ion/ion_page_pool.c
@@ -21,14 +21,9 @@
 #include <linux/list.h>
 #include <linux/module.h>
 #include <linux/slab.h>
-#include <linux/shrinker.h>
+#include <linux/vmalloc.h>
 #include "ion_priv.h"
 
-/* #define DEBUG_PAGE_POOL_SHRINKER */
-
-static struct plist_head pools = PLIST_HEAD_INIT(pools);
-static struct shrinker shrinker;
-
 struct ion_page_pool_item {
 	struct page *page;
 	struct list_head list;
@@ -36,18 +31,28 @@
 
 static void *ion_page_pool_alloc_pages(struct ion_page_pool *pool)
 {
-	struct page *page = alloc_pages(pool->gfp_mask, pool->order);
+	struct page *page;
 	struct scatterlist sg;
 
+	page = alloc_pages(pool->gfp_mask & ~__GFP_ZERO, pool->order);
+
 	if (!page)
 		return NULL;
 
+	if (pool->gfp_mask & __GFP_ZERO)
+		if (ion_heap_high_order_page_zero(
+				page, pool->order, pool->should_invalidate))
+			goto error_free_pages;
+
 	sg_init_table(&sg, 1);
 	sg_set_page(&sg, page, PAGE_SIZE << pool->order, 0);
 	sg_dma_address(&sg) = sg_phys(&sg);
 	dma_sync_sg_for_device(NULL, &sg, 1, DMA_BIDIRECTIONAL);
 
 	return page;
+error_free_pages:
+	__free_pages(page, pool->order);
+	return NULL;
 }
 
 static void ion_page_pool_free_pages(struct ion_page_pool *pool,
@@ -128,113 +133,50 @@
 		ion_page_pool_free_pages(pool, page);
 }
 
-#ifdef DEBUG_PAGE_POOL_SHRINKER
-static int debug_drop_pools_set(void *data, u64 val)
+static int ion_page_pool_total(struct ion_page_pool *pool, bool high)
 {
-	struct shrink_control sc;
-	int objs;
-
-	sc.gfp_mask = -1;
-	sc.nr_to_scan = 0;
-
-	if (!val)
-		return 0;
-
-	objs = shrinker.shrink(&shrinker, &sc);
-	sc.nr_to_scan = objs;
-
-	shrinker.shrink(&shrinker, &sc);
-	return 0;
-}
-
-static int debug_drop_pools_get(void *data, u64 *val)
-{
-	struct shrink_control sc;
-	int objs;
-
-	sc.gfp_mask = -1;
-	sc.nr_to_scan = 0;
-
-	objs = shrinker.shrink(&shrinker, &sc);
-	*val = objs;
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(debug_drop_pools_fops, debug_drop_pools_get,
-                        debug_drop_pools_set, "%llu\n");
-
-static int debug_grow_pools_set(void *data, u64 val)
-{
-	struct ion_page_pool *pool;
-	struct page *page;
-
-	plist_for_each_entry(pool, &pools, list) {
-		if (val != pool->list.prio)
-			continue;
-		page = ion_page_pool_alloc_pages(pool);
-		if (page)
-			ion_page_pool_add(pool, page);
-	}
-
-	return 0;
-}
-
-DEFINE_SIMPLE_ATTRIBUTE(debug_grow_pools_fops, debug_drop_pools_get,
-			debug_grow_pools_set, "%llu\n");
-#endif
-
-static int ion_page_pool_total(bool high)
-{
-	struct ion_page_pool *pool;
 	int total = 0;
 
-	plist_for_each_entry(pool, &pools, list) {
-		total += high ? (pool->high_count + pool->low_count) *
-			(1 << pool->order) :
+	total += high ? (pool->high_count + pool->low_count) *
+		(1 << pool->order) :
 			pool->low_count * (1 << pool->order);
-	}
 	return total;
 }
 
-static int ion_page_pool_shrink(struct shrinker *shrinker,
-				 struct shrink_control *sc)
+int ion_page_pool_shrink(struct ion_page_pool *pool, gfp_t gfp_mask,
+				int nr_to_scan)
 {
-	struct ion_page_pool *pool;
 	int nr_freed = 0;
 	int i;
 	bool high;
-	int nr_to_scan = sc->nr_to_scan;
 
-	if (sc->gfp_mask & __GFP_HIGHMEM)
-		high = true;
+	high = gfp_mask & __GFP_HIGHMEM;
 
 	if (nr_to_scan == 0)
-		return ion_page_pool_total(high);
+		return ion_page_pool_total(pool, high);
 
-	plist_for_each_entry(pool, &pools, list) {
-		for (i = 0; i < nr_to_scan; i++) {
-			struct page *page;
+	for (i = 0; i < nr_to_scan; i++) {
+		struct page *page;
 
-			mutex_lock(&pool->mutex);
-			if (high && pool->high_count) {
-				page = ion_page_pool_remove(pool, true);
-			} else if (pool->low_count) {
-				page = ion_page_pool_remove(pool, false);
-			} else {
-				mutex_unlock(&pool->mutex);
-				break;
-			}
+		mutex_lock(&pool->mutex);
+		if (high && pool->high_count) {
+			page = ion_page_pool_remove(pool, true);
+		} else if (pool->low_count) {
+			page = ion_page_pool_remove(pool, false);
+		} else {
 			mutex_unlock(&pool->mutex);
-			ion_page_pool_free_pages(pool, page);
-			nr_freed += (1 << pool->order);
+			break;
 		}
-		nr_to_scan -= i;
+		mutex_unlock(&pool->mutex);
+		ion_page_pool_free_pages(pool, page);
+		nr_freed += (1 << pool->order);
 	}
 
-	return ion_page_pool_total(high);
+	return nr_freed;
 }
 
-struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order)
+struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order,
+	bool should_invalidate)
 {
 	struct ion_page_pool *pool = kmalloc(sizeof(struct ion_page_pool),
 					     GFP_KERNEL);
@@ -246,37 +188,25 @@
 	INIT_LIST_HEAD(&pool->high_items);
 	pool->gfp_mask = gfp_mask;
 	pool->order = order;
+	pool->should_invalidate = should_invalidate;
 	mutex_init(&pool->mutex);
 	plist_node_init(&pool->list, order);
-	plist_add(&pool->list, &pools);
 
 	return pool;
 }
 
 void ion_page_pool_destroy(struct ion_page_pool *pool)
 {
-	plist_del(&pool->list, &pools);
 	kfree(pool);
 }
 
 static int __init ion_page_pool_init(void)
 {
-	shrinker.shrink = ion_page_pool_shrink;
-	shrinker.seeks = DEFAULT_SEEKS;
-	shrinker.batch = 0;
-	register_shrinker(&shrinker);
-#ifdef DEBUG_PAGE_POOL_SHRINKER
-	debugfs_create_file("ion_pools_shrink", 0644, NULL, NULL,
-			    &debug_drop_pools_fops);
-	debugfs_create_file("ion_pools_grow", 0644, NULL, NULL,
-			    &debug_grow_pools_fops);
-#endif
 	return 0;
 }
 
 static void __exit ion_page_pool_exit(void)
 {
-	unregister_shrinker(&shrinker);
 }
 
 module_init(ion_page_pool_init);
diff --git a/drivers/gpu/ion/ion_priv.h b/drivers/gpu/ion/ion_priv.h
index e3fbbda..2b00ee6 100644
--- a/drivers/gpu/ion/ion_priv.h
+++ b/drivers/gpu/ion/ion_priv.h
@@ -85,11 +85,16 @@
 	char task_comm[TASK_COMM_LEN];
 	pid_t pid;
 };
+void ion_buffer_destroy(struct ion_buffer *buffer);
 
 /**
  * struct ion_heap_ops - ops to operate on a given heap
  * @allocate:		allocate memory
- * @free:		free memory
+ * @free:		free memory. Will be called with
+ *			ION_FLAG_FREED_FROM_SHRINKER set in buffer flags when
+ *			called from a shrinker. In that case, the pages being
+ *			free'd must be truly free'd back to the system, not put
+ *			in a page pool or otherwise cached.
  * @phys		get physical address of a buffer (only define on
  *			physically contiguous heaps)
  * @map_dma		map the memory for dma to a scatterlist
@@ -115,7 +120,7 @@
 			 struct vm_area_struct *vma);
 	void (*unmap_user) (struct ion_heap *mapper, struct ion_buffer *buffer);
 	int (*print_debug)(struct ion_heap *heap, struct seq_file *s,
-			   const struct rb_root *mem_map);
+			   const struct list_head *mem_map);
 	int (*secure_heap)(struct ion_heap *heap, int version, void *data);
 	int (*unsecure_heap)(struct ion_heap *heap, int version, void *data);
 	int (*secure_buffer)(struct ion_buffer *buffer, int version,
@@ -139,8 +144,13 @@
  *			allocating.  These are specified by platform data and
  *			MUST be unique
  * @name:		used for debugging
+ * @shrinker:		a shrinker for the heap, if the heap caches system
+ *			memory, it must define a shrinker to return it on low
+ *			memory conditions, this includes system memory cached
+ *			in the deferred free lists for heaps that support it
  * @priv:		private heap data
  * @free_list:		free list head if deferred free is used
+ * @free_list_size	size of the deferred free list in bytes
  * @lock:		protects the free list
  * @waitqueue:		queue to wait on from deferred free thread
  * @task:		task struct of deferred free thread
@@ -160,8 +170,10 @@
 	unsigned long flags;
 	unsigned int id;
 	const char *name;
+	struct shrinker shrinker;
 	void *priv;
 	struct list_head free_list;
+	size_t free_list_size;
 	struct rt_mutex lock;
 	wait_queue_head_t waitqueue;
 	struct task_struct *task;
@@ -209,6 +221,11 @@
  */
 void ion_device_add_heap(struct ion_device *dev, struct ion_heap *heap);
 
+struct pages_mem {
+	struct page **pages;
+	void (*free_fn) (const void *);
+};
+
 /**
  * some helpers for common operations on buffers using the sg_table
  * and vaddr fields
@@ -217,7 +234,71 @@
 void ion_heap_unmap_kernel(struct ion_heap *, struct ion_buffer *);
 int ion_heap_map_user(struct ion_heap *, struct ion_buffer *,
 			struct vm_area_struct *);
+int ion_heap_pages_zero(struct page **pages, int num_pages,
+			bool should_invalidate);
 int ion_heap_buffer_zero(struct ion_buffer *buffer);
+int ion_heap_high_order_page_zero(struct page *page,
+				int order, bool should_invalidate);
+
+/**
+ * ion_heap_init_deferred_free -- initialize deferred free functionality
+ * @heap:		the heap
+ *
+ * If a heap sets the ION_HEAP_FLAG_DEFER_FREE flag this function will
+ * be called to setup deferred frees. Calls to free the buffer will
+ * return immediately and the actual free will occur some time later
+ */
+int ion_heap_init_deferred_free(struct ion_heap *heap);
+
+/**
+ * ion_heap_freelist_add - add a buffer to the deferred free list
+ * @heap:		the heap
+ * @buffer: 		the buffer
+ *
+ * Adds an item to the deferred freelist.
+ */
+void ion_heap_freelist_add(struct ion_heap *heap, struct ion_buffer *buffer);
+
+/**
+ * ion_heap_freelist_drain - drain the deferred free list
+ * @heap:		the heap
+ * @size:		ammount of memory to drain in bytes
+ *
+ * Drains the indicated amount of memory from the deferred freelist immediately.
+ * Returns the total amount freed.  The total freed may be higher depending
+ * on the size of the items in the list, or lower if there is insufficient
+ * total memory on the freelist.
+ */
+size_t ion_heap_freelist_drain(struct ion_heap *heap, size_t size);
+
+/**
+ * ion_heap_freelist_drain_from_shrinker - drain the deferred free
+ *				list, skipping any heap-specific
+ *				pooling or caching mechanisms
+ *
+ * @heap:		the heap
+ * @size:		amount of memory to drain in bytes
+ *
+ * Drains the indicated amount of memory from the deferred freelist immediately.
+ * Returns the total amount freed.  The total freed may be higher depending
+ * on the size of the items in the list, or lower if there is insufficient
+ * total memory on the freelist.
+ *
+ * Unlike with @ion_heap_freelist_drain, don't put any pages back into
+ * page pools or otherwise cache the pages. Everything must be
+ * genuinely free'd back to the system. If you're free'ing from a
+ * shrinker you probably want to use this. Note that this relies on
+ * the heap.ops.free callback honoring the
+ * ION_FLAG_FREED_FROM_SHRINKER flag.
+ */
+size_t ion_heap_freelist_drain_from_shrinker(struct ion_heap *heap,
+					size_t size);
+
+/**
+ * ion_heap_freelist_size - returns the size of the freelist in bytes
+ * @heap:		the heap
+ */
+size_t ion_heap_freelist_size(struct ion_heap *heap);
 
 
 /**
@@ -276,6 +357,8 @@
  * @gfp_mask:		gfp_mask to use from alloc
  * @order:		order of pages in the pool
  * @list:		plist node for list of pools
+ * @should_invalidate:	whether or not the cache needs to be invalidated at
+ *			page allocation time.
  *
  * Allows you to keep a pool of pre allocated pages to use from your heap.
  * Keeping a pool of pages that is ready for dma, ie any cached mapping have
@@ -288,16 +371,26 @@
 	struct list_head high_items;
 	struct list_head low_items;
 	struct mutex mutex;
-	void *(*alloc)(struct ion_page_pool *pool);
-	void (*free)(struct ion_page_pool *pool, struct page *page);
 	gfp_t gfp_mask;
 	unsigned int order;
 	struct plist_node list;
+	bool should_invalidate;
 };
 
-struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order);
+struct ion_page_pool *ion_page_pool_create(gfp_t gfp_mask, unsigned int order,
+	bool should_invalidate);
 void ion_page_pool_destroy(struct ion_page_pool *);
 void *ion_page_pool_alloc(struct ion_page_pool *);
 void ion_page_pool_free(struct ion_page_pool *, struct page *);
 
+/** ion_page_pool_shrink - shrinks the size of the memory cached in the pool
+ * @pool:		the pool
+ * @gfp_mask:		the memory type to reclaim
+ * @nr_to_scan:		number of items to shrink in pages
+ *
+ * returns the number of items freed in pages
+ */
+int ion_page_pool_shrink(struct ion_page_pool *pool, gfp_t gfp_mask,
+			  int nr_to_scan);
+
 #endif /* _ION_PRIV_H */
diff --git a/drivers/gpu/ion/ion_removed_heap.c b/drivers/gpu/ion/ion_removed_heap.c
index 84d8d37..94d4a25 100644
--- a/drivers/gpu/ion/ion_removed_heap.c
+++ b/drivers/gpu/ion/ion_removed_heap.c
@@ -233,7 +233,7 @@
 }
 
 static int ion_removed_print_debug(struct ion_heap *heap, struct seq_file *s,
-				    const struct rb_root *mem_map)
+				    const struct list_head *mem_map)
 {
 	struct ion_removed_heap *removed_heap =
 		container_of(heap, struct ion_removed_heap, heap);
@@ -247,16 +247,14 @@
 		unsigned long size = removed_heap->total_size;
 		unsigned long end = base+size;
 		unsigned long last_end = base;
-		struct rb_node *n;
+		struct mem_map_data *data;
 
 		seq_printf(s, "\nMemory Map\n");
 		seq_printf(s, "%16.s %14.s %14.s %14.s\n",
 			   "client", "start address", "end address",
 			   "size (hex)");
 
-		for (n = rb_first(mem_map); n; n = rb_next(n)) {
-			struct mem_map_data *data =
-					rb_entry(n, struct mem_map_data, node);
+		list_for_each_entry(data, mem_map, node) {
 			const char *client_name = "(null)";
 
 			if (last_end < data->addr) {
diff --git a/drivers/gpu/ion/ion_system_heap.c b/drivers/gpu/ion/ion_system_heap.c
index e1b3383..8e885b2 100644
--- a/drivers/gpu/ion/ion_system_heap.c
+++ b/drivers/gpu/ion/ion_system_heap.c
@@ -53,7 +53,8 @@
 
 struct ion_system_heap {
 	struct ion_heap heap;
-	struct ion_page_pool **pools;
+	struct ion_page_pool **uncached_pools;
+	struct ion_page_pool **cached_pools;
 };
 
 struct page_info {
@@ -68,29 +69,14 @@
 {
 	bool cached = ion_buffer_cached(buffer);
 	bool split_pages = ion_buffer_fault_user_mappings(buffer);
-	struct ion_page_pool *pool = heap->pools[order_to_index(order)];
 	struct page *page;
+	struct ion_page_pool *pool;
 
-	if (!cached) {
-		page = ion_page_pool_alloc(pool);
-	} else {
-		struct scatterlist sg;
-		gfp_t gfp_flags = low_order_gfp_flags;
-
-		if (order > 4)
-			gfp_flags = high_order_gfp_flags;
-		trace_alloc_pages_sys_start(gfp_flags, order);
-		page = alloc_pages(gfp_flags, order);
-		trace_alloc_pages_sys_end(gfp_flags, order);
-		if (!page) {
-			trace_alloc_pages_sys_fail(gfp_flags, order);
-			return 0;
-		}
-		sg_init_table(&sg, 1);
-		sg_set_page(&sg, page, PAGE_SIZE << order, 0);
-		sg_dma_address(&sg) = sg_phys(&sg);
-		dma_sync_sg_for_device(NULL, &sg, 1, DMA_BIDIRECTIONAL);
-	}
+	if (!cached)
+		pool = heap->uncached_pools[order_to_index(order)];
+	else
+		pool = heap->cached_pools[order_to_index(order)];
+	page = ion_page_pool_alloc(pool);
 	if (!page)
 		return 0;
 
@@ -107,14 +93,20 @@
 	bool split_pages = ion_buffer_fault_user_mappings(buffer);
 	int i;
 
-	if (!cached) {
-		struct ion_page_pool *pool = heap->pools[order_to_index(order)];
+	if ((buffer->flags & ION_FLAG_FREED_FROM_SHRINKER)) {
+		if (split_pages) {
+			for (i = 0; i < (1 << order); i++)
+				__free_page(page + i);
+		} else {
+			__free_pages(page, order);
+		}
+	} else  {
+		struct ion_page_pool *pool;
+		if (cached)
+			pool = heap->cached_pools[order_to_index(order)];
+		else
+			pool = heap->uncached_pools[order_to_index(order)];
 		ion_page_pool_free(pool, page);
-	} else if (split_pages) {
-		for (i = 0; i < (1 << order); i++)
-			__free_page(page + i);
-	} else {
-		__free_pages(page, order);
 	}
 }
 
@@ -226,14 +218,11 @@
 							struct ion_system_heap,
 							heap);
 	struct sg_table *table = buffer->sg_table;
-	bool cached = ion_buffer_cached(buffer);
 	struct scatterlist *sg;
 	LIST_HEAD(pages);
 	int i;
 
-	/* uncached pages come from the page pools, zero them before returning
-	   for security purposes (other allocations are zerod at alloc time */
-	if (!cached)
+	if (!(buffer->flags & ION_FLAG_FREED_FROM_SHRINKER))
 		ion_heap_buffer_zero(buffer);
 
 	for_each_sg(table->sgl, sg, table->nents, i)
@@ -265,6 +254,56 @@
 	.map_user = ion_heap_map_user,
 };
 
+static int ion_system_heap_shrink(struct shrinker *shrinker,
+				  struct shrink_control *sc) {
+
+	struct ion_heap *heap = container_of(shrinker, struct ion_heap,
+					     shrinker);
+	struct ion_system_heap *sys_heap = container_of(heap,
+							struct ion_system_heap,
+							heap);
+	int nr_total = 0;
+	int nr_freed = 0;
+	int i;
+
+	if (sc->nr_to_scan == 0)
+		goto end;
+
+	/* shrink the free list first, no point in zeroing the memory if
+	   we're just going to reclaim it. Also, skip any possible
+	   page pooling */
+	nr_freed += ion_heap_freelist_drain_from_shrinker(
+		heap, sc->nr_to_scan * PAGE_SIZE) / PAGE_SIZE;
+
+	if (nr_freed >= sc->nr_to_scan)
+		goto end;
+
+	for (i = 0; i < num_orders; i++) {
+		nr_freed += ion_page_pool_shrink(sys_heap->uncached_pools[i],
+						sc->gfp_mask, sc->nr_to_scan);
+		if (nr_freed >= sc->nr_to_scan)
+			goto end;
+
+		nr_freed += ion_page_pool_shrink(sys_heap->cached_pools[i],
+						sc->gfp_mask, sc->nr_to_scan);
+		if (nr_freed >= sc->nr_to_scan)
+			goto end;
+	}
+
+end:
+	/* total number of items is whatever the page pools are holding
+	   plus whatever's in the freelist */
+	for (i = 0; i < num_orders; i++) {
+		nr_total += ion_page_pool_shrink(
+			sys_heap->uncached_pools[i], sc->gfp_mask, 0);
+		nr_total += ion_page_pool_shrink(
+			sys_heap->cached_pools[i], sc->gfp_mask, 0);
+	}
+	nr_total += ion_heap_freelist_size(heap) / PAGE_SIZE;
+	return nr_total;
+
+}
+
 static int ion_system_heap_debug_show(struct ion_heap *heap, struct seq_file *s,
 				      void *unused)
 {
@@ -274,21 +313,74 @@
 							heap);
 	int i;
 	for (i = 0; i < num_orders; i++) {
-		struct ion_page_pool *pool = sys_heap->pools[i];
-		seq_printf(s, "%d order %u highmem pages in pool = %lu total\n",
-			   pool->high_count, pool->order,
-			   (1 << pool->order) * PAGE_SIZE * pool->high_count);
-		seq_printf(s, "%d order %u lowmem pages in pool = %lu total\n",
-			   pool->low_count, pool->order,
-			   (1 << pool->order) * PAGE_SIZE * pool->low_count);
+		struct ion_page_pool *pool = sys_heap->uncached_pools[i];
+		seq_printf(s,
+			"%d order %u highmem pages in uncached pool = %lu total\n",
+			pool->high_count, pool->order,
+			(1 << pool->order) * PAGE_SIZE * pool->high_count);
+		seq_printf(s,
+			"%d order %u lowmem pages in uncached pool = %lu total\n",
+			pool->low_count, pool->order,
+			(1 << pool->order) * PAGE_SIZE * pool->low_count);
+	}
+
+	for (i = 0; i < num_orders; i++) {
+		struct ion_page_pool *pool = sys_heap->cached_pools[i];
+		seq_printf(s,
+			"%d order %u highmem pages in cached pool = %lu total\n",
+			pool->high_count, pool->order,
+			(1 << pool->order) * PAGE_SIZE * pool->high_count);
+		seq_printf(s,
+			"%d order %u lowmem pages in cached pool = %lu total\n",
+			pool->low_count, pool->order,
+			(1 << pool->order) * PAGE_SIZE * pool->low_count);
+	}
+
+	return 0;
+}
+
+
+static void ion_system_heap_destroy_pools(struct ion_page_pool **pools)
+{
+	int i;
+	for (i = 0; i < num_orders; i++)
+		if (pools[i])
+			ion_page_pool_destroy(pools[i]);
+}
+
+/**
+ * ion_system_heap_create_pools - Creates pools for all orders
+ *
+ * If this fails you don't need to destroy any pools. It's all or
+ * nothing. If it succeeds you'll eventually need to use
+ * ion_system_heap_destroy_pools to destroy the pools.
+ */
+static int ion_system_heap_create_pools(struct ion_page_pool **pools,
+					bool should_invalidate)
+{
+	int i;
+	for (i = 0; i < num_orders; i++) {
+		struct ion_page_pool *pool;
+		gfp_t gfp_flags = low_order_gfp_flags;
+
+		if (orders[i] > 4)
+			gfp_flags = high_order_gfp_flags;
+		pool = ion_page_pool_create(gfp_flags, orders[i],
+					should_invalidate);
+		if (!pool)
+			goto err_create_pool;
+		pools[i] = pool;
 	}
 	return 0;
+err_create_pool:
+	ion_system_heap_destroy_pools(pools);
+	return 1;
 }
 
 struct ion_heap *ion_system_heap_create(struct ion_platform_heap *unused)
 {
 	struct ion_system_heap *heap;
-	int i;
+	int pools_size = sizeof(struct ion_page_pool *) * num_orders;
 
 	heap = kzalloc(sizeof(struct ion_system_heap), GFP_KERNEL);
 	if (!heap)
@@ -296,29 +388,35 @@
 	heap->heap.ops = &system_heap_ops;
 	heap->heap.type = ION_HEAP_TYPE_SYSTEM;
 	heap->heap.flags = ION_HEAP_FLAG_DEFER_FREE;
-	heap->pools = kzalloc(sizeof(struct ion_page_pool *) * num_orders,
-			      GFP_KERNEL);
-	if (!heap->pools)
-		goto err_alloc_pools;
-	for (i = 0; i < num_orders; i++) {
-		struct ion_page_pool *pool;
-		gfp_t gfp_flags = low_order_gfp_flags;
 
-		if (orders[i] > 4)
-			gfp_flags = high_order_gfp_flags;
-		pool = ion_page_pool_create(gfp_flags, orders[i]);
-		if (!pool)
-			goto err_create_pool;
-		heap->pools[i] = pool;
-	}
+	heap->uncached_pools = kzalloc(pools_size, GFP_KERNEL);
+	if (!heap->uncached_pools)
+		goto err_alloc_uncached_pools;
+
+	heap->cached_pools = kzalloc(pools_size, GFP_KERNEL);
+	if (!heap->cached_pools)
+		goto err_alloc_cached_pools;
+
+	if (ion_system_heap_create_pools(heap->uncached_pools, false))
+		goto err_create_uncached_pools;
+
+	if (ion_system_heap_create_pools(heap->cached_pools, true))
+		goto err_create_cached_pools;
+
+	heap->heap.shrinker.shrink = ion_system_heap_shrink;
+	heap->heap.shrinker.seeks = DEFAULT_SEEKS;
+	heap->heap.shrinker.batch = 0;
+	register_shrinker(&heap->heap.shrinker);
 	heap->heap.debug_show = ion_system_heap_debug_show;
 	return &heap->heap;
-err_create_pool:
-	for (i = 0; i < num_orders; i++)
-		if (heap->pools[i])
-			ion_page_pool_destroy(heap->pools[i]);
-	kfree(heap->pools);
-err_alloc_pools:
+
+err_create_cached_pools:
+	ion_system_heap_destroy_pools(heap->uncached_pools);
+err_create_uncached_pools:
+	kfree(heap->cached_pools);
+err_alloc_cached_pools:
+	kfree(heap->uncached_pools);
+err_alloc_uncached_pools:
 	kfree(heap);
 	return ERR_PTR(-ENOMEM);
 }
@@ -328,36 +426,82 @@
 	struct ion_system_heap *sys_heap = container_of(heap,
 							struct ion_system_heap,
 							heap);
-	int i;
 
-	for (i = 0; i < num_orders; i++)
-		ion_page_pool_destroy(sys_heap->pools[i]);
-	kfree(sys_heap->pools);
+	ion_system_heap_destroy_pools(sys_heap->uncached_pools);
+	ion_system_heap_destroy_pools(sys_heap->cached_pools);
+	kfree(sys_heap->uncached_pools);
+	kfree(sys_heap->cached_pools);
 	kfree(sys_heap);
 }
 
+struct kmalloc_buffer_info {
+	struct sg_table *table;
+	void *vaddr;
+};
+
 static int ion_system_contig_heap_allocate(struct ion_heap *heap,
 					   struct ion_buffer *buffer,
 					   unsigned long len,
 					   unsigned long align,
 					   unsigned long flags)
 {
-	buffer->priv_virt = kzalloc(len, GFP_KERNEL);
-	if (!buffer->priv_virt)
-		return -ENOMEM;
+	int ret;
+	struct kmalloc_buffer_info *info;
+
+	info = kmalloc(sizeof(struct kmalloc_buffer_info), GFP_KERNEL);
+	if (!info) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	info->table = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
+	if (!info->table) {
+		ret = -ENOMEM;
+		goto kfree_info;
+	}
+
+	ret = sg_alloc_table(info->table, 1, GFP_KERNEL);
+	if (ret)
+		goto kfree_table;
+
+	info->vaddr = kzalloc(len, GFP_KERNEL);
+	if (!info->vaddr) {
+		ret = -ENOMEM;
+		goto sg_free_table;
+	}
+
+	sg_set_page(info->table->sgl, virt_to_page(info->vaddr), len,
+		    0);
+	sg_dma_address(info->table->sgl) = virt_to_phys(info->vaddr);
+	dma_sync_sg_for_device(NULL, info->table->sgl, 1, DMA_BIDIRECTIONAL);
+
+	buffer->priv_virt = info;
 	return 0;
+
+sg_free_table:
+	sg_free_table(info->table);
+kfree_table:
+	kfree(info->table);
+kfree_info:
+	kfree(info);
+out:
+	return ret;
 }
 
 void ion_system_contig_heap_free(struct ion_buffer *buffer)
 {
-	kfree(buffer->priv_virt);
+	struct kmalloc_buffer_info *info = buffer->priv_virt;
+	sg_free_table(info->table);
+	kfree(info->table);
+	kfree(info->vaddr);
 }
 
 static int ion_system_contig_heap_phys(struct ion_heap *heap,
 				       struct ion_buffer *buffer,
 				       ion_phys_addr_t *addr, size_t *len)
 {
-	*addr = virt_to_phys(buffer->priv_virt);
+	struct kmalloc_buffer_info *info = buffer->priv_virt;
+	*addr = virt_to_phys(info->vaddr);
 	*len = buffer->size;
 	return 0;
 }
@@ -365,27 +509,13 @@
 struct sg_table *ion_system_contig_heap_map_dma(struct ion_heap *heap,
 						struct ion_buffer *buffer)
 {
-	struct sg_table *table;
-	int ret;
-
-	table = kzalloc(sizeof(struct sg_table), GFP_KERNEL);
-	if (!table)
-		return ERR_PTR(-ENOMEM);
-	ret = sg_alloc_table(table, 1, GFP_KERNEL);
-	if (ret) {
-		kfree(table);
-		return ERR_PTR(ret);
-	}
-	sg_set_page(table->sgl, virt_to_page(buffer->priv_virt), buffer->size,
-		    0);
-	return table;
+	struct kmalloc_buffer_info *info = buffer->priv_virt;
+	return info->table;
 }
 
 void ion_system_contig_heap_unmap_dma(struct ion_heap *heap,
 				      struct ion_buffer *buffer)
 {
-	sg_free_table(buffer->sg_table);
-	kfree(buffer->sg_table);
 }
 
 static struct ion_heap_ops kmalloc_ops = {
diff --git a/drivers/gpu/ion/msm/msm_ion.c b/drivers/gpu/ion/msm/msm_ion.c
index 118c39a..c77bac7 100644
--- a/drivers/gpu/ion/msm/msm_ion.c
+++ b/drivers/gpu/ion/msm/msm_ion.c
@@ -51,74 +51,56 @@
 static struct ion_heap_desc ion_heap_meta[] = {
 	{
 		.id	= ION_SYSTEM_HEAP_ID,
-		.type	= ION_HEAP_TYPE_SYSTEM,
-		.name	= ION_VMALLOC_HEAP_NAME,
+		.name	= ION_SYSTEM_HEAP_NAME,
 	},
 	{
 		.id	= ION_SYSTEM_CONTIG_HEAP_ID,
-		.type	= ION_HEAP_TYPE_SYSTEM_CONTIG,
 		.name	= ION_KMALLOC_HEAP_NAME,
 	},
 	{
 		.id	= ION_CP_MM_HEAP_ID,
-		.type	= ION_HEAP_TYPE_SECURE_DMA,
 		.name	= ION_MM_HEAP_NAME,
 		.permission_type = IPT_TYPE_MM_CARVEOUT,
 	},
 	{
 		.id	= ION_MM_FIRMWARE_HEAP_ID,
-		.type	= ION_HEAP_TYPE_CARVEOUT,
 		.name	= ION_MM_FIRMWARE_HEAP_NAME,
 	},
 	{
 		.id	= ION_CP_MFC_HEAP_ID,
-		.type	= ION_HEAP_TYPE_CP,
 		.name	= ION_MFC_HEAP_NAME,
 		.permission_type = IPT_TYPE_MFC_SHAREDMEM,
 	},
 	{
 		.id	= ION_SF_HEAP_ID,
-		.type	= ION_HEAP_TYPE_CARVEOUT,
 		.name	= ION_SF_HEAP_NAME,
 	},
 	{
-		.id	= ION_IOMMU_HEAP_ID,
-		.type	= ION_HEAP_TYPE_IOMMU,
-		.name	= ION_IOMMU_HEAP_NAME,
-	},
-	{
 		.id	= ION_QSECOM_HEAP_ID,
-		.type	= ION_HEAP_TYPE_DMA,
 		.name	= ION_QSECOM_HEAP_NAME,
 	},
 	{
 		.id	= ION_AUDIO_HEAP_ID,
-		.type	= ION_HEAP_TYPE_CARVEOUT,
 		.name	= ION_AUDIO_HEAP_NAME,
 	},
 	{
 		.id	= ION_PIL1_HEAP_ID,
-		.type	= ION_HEAP_TYPE_CARVEOUT,
 		.name	= ION_PIL1_HEAP_NAME,
 	},
 	{
 		.id	= ION_PIL2_HEAP_ID,
-		.type	= ION_HEAP_TYPE_CARVEOUT,
 		.name	= ION_PIL2_HEAP_NAME,
 	},
 	{
 		.id	= ION_CP_WB_HEAP_ID,
-		.type	= ION_HEAP_TYPE_CP,
 		.name	= ION_WB_HEAP_NAME,
 	},
 	{
 		.id	= ION_CAMERA_HEAP_ID,
-		.type	= ION_HEAP_TYPE_CARVEOUT,
 		.name	= ION_CAMERA_HEAP_NAME,
 	},
 	{
 		.id	= ION_ADSP_HEAP_ID,
-		.type	= ION_HEAP_TYPE_DMA,
 		.name	= ION_ADSP_HEAP_NAME,
 	}
 };
@@ -127,6 +109,16 @@
 struct ion_client *msm_ion_client_create(unsigned int heap_mask,
 					const char *name)
 {
+	/*
+	 * The assumption is that if there is a NULL device, the ion
+	 * driver has not yet probed.
+	 */
+	if (idev == NULL)
+		return ERR_PTR(-EPROBE_DEFER);
+
+	if (IS_ERR(idev))
+		return (struct ion_client *)idev;
+
 	return ion_client_create(idev, name);
 }
 EXPORT_SYMBOL(msm_ion_client_create);
@@ -593,15 +585,58 @@
 	return ret;
 }
 
-static int msm_ion_populate_heap(struct ion_platform_heap *heap)
+#define MAKE_HEAP_TYPE_MAPPING(h) { .name = #h, \
+			.heap_type = ION_HEAP_TYPE_##h, }
+
+static struct heap_types_info {
+	const char *name;
+	int heap_type;
+} heap_types_info[] = {
+	MAKE_HEAP_TYPE_MAPPING(SYSTEM),
+	MAKE_HEAP_TYPE_MAPPING(SYSTEM_CONTIG),
+	MAKE_HEAP_TYPE_MAPPING(CARVEOUT),
+	MAKE_HEAP_TYPE_MAPPING(CHUNK),
+	MAKE_HEAP_TYPE_MAPPING(DMA),
+	MAKE_HEAP_TYPE_MAPPING(CP),
+	MAKE_HEAP_TYPE_MAPPING(SECURE_DMA),
+	MAKE_HEAP_TYPE_MAPPING(REMOVED),
+};
+
+static int msm_ion_get_heap_type_from_dt_node(struct device_node *node,
+					int *heap_type)
+{
+	const char *name;
+	int i, ret = -EINVAL;
+	ret = of_property_read_string(node, "qcom,ion-heap-type", &name);
+	if (ret)
+		goto out;
+	for (i = 0; i < ARRAY_SIZE(heap_types_info); ++i) {
+		if (!strcmp(heap_types_info[i].name, name)) {
+			*heap_type = heap_types_info[i].heap_type;
+			ret = 0;
+			goto out;
+		}
+	}
+	WARN(1, "Unknown heap type: %s. You might need to update heap_types_info in %s",
+		name, __FILE__);
+out:
+	return ret;
+}
+
+static int msm_ion_populate_heap(struct device_node *node,
+				struct ion_platform_heap *heap)
 {
 	unsigned int i;
-	int ret = -EINVAL;
+	int ret = -EINVAL, heap_type = -1;
 	unsigned int len = ARRAY_SIZE(ion_heap_meta);
 	for (i = 0; i < len; ++i) {
 		if (ion_heap_meta[i].id == heap->id) {
 			heap->name = ion_heap_meta[i].name;
-			heap->type = ion_heap_meta[i].type;
+			ret = msm_ion_get_heap_type_from_dt_node(node,
+								&heap_type);
+			if (ret)
+				break;
+			heap->type = heap_type;
 			ret = msm_init_extra_data(heap, &ion_heap_meta[i]);
 			break;
 		}
@@ -793,7 +828,7 @@
 		}
 		pdata->heaps[idx].id = val;
 
-		ret = msm_ion_populate_heap(&pdata->heaps[idx]);
+		ret = msm_ion_populate_heap(node, &pdata->heaps[idx]);
 		if (ret)
 			goto free_heaps;
 
@@ -933,9 +968,6 @@
 	struct ion_heap *heap = NULL;
 
 	switch ((int)heap_data->type) {
-	case ION_HEAP_TYPE_IOMMU:
-		heap = ion_iommu_heap_create(heap_data);
-		break;
 	case ION_HEAP_TYPE_CP:
 		heap = ion_cp_heap_create(heap_data);
 		break;
@@ -975,9 +1007,6 @@
 		return;
 
 	switch ((int)heap->type) {
-	case ION_HEAP_TYPE_IOMMU:
-		ion_iommu_heap_destroy(heap);
-		break;
 	case ION_HEAP_TYPE_CP:
 		ion_cp_heap_destroy(heap);
 		break;
@@ -999,6 +1028,7 @@
 
 static int msm_ion_probe(struct platform_device *pdev)
 {
+	static struct ion_device *new_dev;
 	struct ion_platform_data *pdata;
 	unsigned int pdata_needs_to_be_freed;
 	int err = -1;
@@ -1024,9 +1054,14 @@
 		goto out;
 	}
 
-	idev = ion_device_create(msm_ion_custom_ioctl);
-	if (IS_ERR_OR_NULL(idev)) {
-		err = PTR_ERR(idev);
+	new_dev = ion_device_create(msm_ion_custom_ioctl);
+	if (IS_ERR_OR_NULL(new_dev)) {
+		/*
+		 * set this to the ERR to indicate to the clients
+		 * that Ion failed to probe.
+		 */
+		idev = new_dev;
+		err = PTR_ERR(new_dev);
 		goto freeheaps;
 	}
 
@@ -1053,13 +1088,18 @@
 							  heap_data->name);
 		}
 
-		ion_device_add_heap(idev, heaps[i]);
+		ion_device_add_heap(new_dev, heaps[i]);
 	}
 	check_for_heap_overlap(pdata->heaps, num_heaps);
 	if (pdata_needs_to_be_freed)
 		free_pdata(pdata);
 
-	platform_set_drvdata(pdev, idev);
+	platform_set_drvdata(pdev, new_dev);
+	/*
+	 * intentionally set this at the very end to allow probes to be deferred
+	 * completely until Ion is setup
+	 */
+	idev = new_dev;
 	return 0;
 
 freeheaps:
diff --git a/drivers/gpu/ion/msm_ion_priv.h b/drivers/gpu/ion/msm_ion_priv.h
index 2de4e8a..412ead2 100644
--- a/drivers/gpu/ion/msm_ion_priv.h
+++ b/drivers/gpu/ion/msm_ion_priv.h
@@ -21,14 +21,14 @@
 #include <linux/kref.h>
 #include <linux/mm_types.h>
 #include <linux/mutex.h>
-#include <linux/rbtree.h>
+#include <linux/types.h>
 #include <linux/ion.h>
 #include <linux/iommu.h>
 #include <linux/seq_file.h>
 
 /**
  * struct mem_map_data - represents information about the memory map for a heap
- * @node:		rb node used to store in the tree of mem_map_data
+ * @node:		list node used to store in the list of mem_map_data
  * @addr:		start address of memory region.
  * @addr:		end address of memory region.
  * @size:		size of memory region
@@ -36,7 +36,7 @@
  *
  */
 struct mem_map_data {
-	struct rb_node node;
+	struct list_head node;
 	ion_phys_addr_t addr;
 	ion_phys_addr_t addr_end;
 	unsigned long size;
diff --git a/drivers/media/platform/msm/camera_v2/pproc/cpp/msm_cpp.c b/drivers/media/platform/msm/camera_v2/pproc/cpp/msm_cpp.c
index 2124b13..63973b4 100644
--- a/drivers/media/platform/msm/camera_v2/pproc/cpp/msm_cpp.c
+++ b/drivers/media/platform/msm/camera_v2/pproc/cpp/msm_cpp.c
@@ -1285,8 +1285,8 @@
 
 	in_phyaddr = msm_cpp_fetch_buffer_info(cpp_dev,
 		&new_frame->input_buffer_info,
-		((new_frame->identity >> 16) & 0xFFFF),
-		(new_frame->identity & 0xFFFF), &in_fd);
+		((new_frame->input_buffer_info.identity >> 16) & 0xFFFF),
+		(new_frame->input_buffer_info.identity & 0xFFFF), &in_fd);
 	if (!in_phyaddr) {
 		pr_err("error gettting input physical address\n");
 		rc = -EINVAL;
diff --git a/include/linux/ion.h b/include/linux/ion.h
index 4983316..f36298b 100644
--- a/include/linux/ion.h
+++ b/include/linux/ion.h
@@ -58,6 +58,14 @@
 #define ION_FLAG_CACHED_NEEDS_SYNC 2	/* mappings of this buffer will created
 					   at mmap time, if this is set
 					   caches must be managed manually */
+#define ION_FLAG_FREED_FROM_SHRINKER 4	/* Skip any possible
+					   heap-specific caching
+					   mechanism (e.g. page
+					   pools). Guarantees that any
+					   buffer storage that came
+					   from the system allocator
+					   will be returned to the
+					   system allocator. */
 
 #ifdef __KERNEL__
 #include <linux/err.h>
diff --git a/include/linux/msm_ion.h b/include/linux/msm_ion.h
index 16a1000..3976699 100644
--- a/include/linux/msm_ion.h
+++ b/include/linux/msm_ion.h
@@ -5,11 +5,14 @@
 
 enum msm_ion_heap_types {
 	ION_HEAP_TYPE_MSM_START = ION_HEAP_TYPE_CUSTOM + 1,
-	ION_HEAP_TYPE_IOMMU = ION_HEAP_TYPE_MSM_START,
-	ION_HEAP_TYPE_DMA,
+	ION_HEAP_TYPE_DMA = ION_HEAP_TYPE_MSM_START,
 	ION_HEAP_TYPE_CP,
 	ION_HEAP_TYPE_SECURE_DMA,
 	ION_HEAP_TYPE_REMOVED,
+	/*
+	 * if you add a heap type here you should also add it to
+	 * heap_types_info[] in msm_ion.c
+	 */
 };
 
 /**
@@ -31,17 +34,23 @@
 	ION_ADSP_HEAP_ID = 22,
 	ION_PIL1_HEAP_ID = 23, /* Currently used for other PIL images */
 	ION_SF_HEAP_ID = 24,
-	ION_IOMMU_HEAP_ID = 25,
+	ION_SYSTEM_HEAP_ID = 25,
 	ION_PIL2_HEAP_ID = 26, /* Currently used for modem firmware images */
 	ION_QSECOM_HEAP_ID = 27,
 	ION_AUDIO_HEAP_ID = 28,
 
 	ION_MM_FIRMWARE_HEAP_ID = 29,
-	ION_SYSTEM_HEAP_ID = 30,
 
 	ION_HEAP_ID_RESERVED = 31 /** Bit reserved for ION_FLAG_SECURE flag */
 };
 
+/*
+ * The IOMMU heap is deprecated! Here are some aliases for backwards
+ * compatibility:
+ */
+#define ION_IOMMU_HEAP_ID ION_SYSTEM_HEAP_ID
+#define ION_HEAP_TYPE_IOMMU ION_HEAP_TYPE_SYSTEM
+
 enum ion_fixed_position {
 	NOT_FIXED,
 	FIXED_LOW,
@@ -90,7 +99,8 @@
 #define ION_HEAP(bit) (1 << (bit))
 
 #define ION_ADSP_HEAP_NAME	"adsp"
-#define ION_VMALLOC_HEAP_NAME	"vmalloc"
+#define ION_SYSTEM_HEAP_NAME	"system"
+#define ION_VMALLOC_HEAP_NAME	ION_SYSTEM_HEAP_NAME
 #define ION_KMALLOC_HEAP_NAME	"kmalloc"
 #define ION_AUDIO_HEAP_NAME	"audio"
 #define ION_SF_HEAP_NAME	"sf"
diff --git a/include/media/msmb_pproc.h b/include/media/msmb_pproc.h
index de42c38..ed4ffa2 100644
--- a/include/media/msmb_pproc.h
+++ b/include/media/msmb_pproc.h
@@ -90,6 +90,7 @@
 	uint32_t offset;
 	uint8_t native_buff;
 	uint8_t processed_divert;
+	uint32_t identity;
 };
 
 struct msm_cpp_stream_buff_info_t {