Merge "drivers: thermal: Update the set_trips for hysteresis change" into msm-4.9
diff --git a/Documentation/devicetree/bindings/arm/msm/qcom,osm.txt b/Documentation/devicetree/bindings/arm/msm/qcom,osm.txt
index 964fea6..2347477 100644
--- a/Documentation/devicetree/bindings/arm/msm/qcom,osm.txt
+++ b/Documentation/devicetree/bindings/arm/msm/qcom,osm.txt
@@ -25,9 +25,8 @@
 	Value type: <stringlist>
 	Definition: Address names. Must be "osm_l3_base", "osm_pwrcl_base",
 		    "osm_perfcl_base", "l3_pll", "pwrcl_pll", "perfcl_pll",
-		    "l3_sequencer", "pwrcl_sequencer", "perfcl_sequencer" or
-		    "apps_itm_ctl". Optionally, "l3_efuse", "pwrcl_efuse"
-		    "perfcl_efuse".
+		    "l3_sequencer", "pwrcl_sequencer", "perfcl_sequencer".
+		    Optionally, "l3_efuse", "pwrcl_efuse", "perfcl_efuse".
 		    Must be specified in the same order as the corresponding
 		    addresses are specified in the reg property.
 
@@ -350,12 +349,11 @@
 			<0x178b0000 0x1000>,
 			<0x17d42400 0x0c00>,
 			<0x17d44400 0x0c00>,
-			<0x17d46c00 0x0c00>,
-			<0x17810090 0x8>;
+			<0x17d46c00 0x0c00>;
 		reg-names = "osm_l3_base", "osm_pwrcl_base", "osm_perfcl_base",
 			"l3_pll", "pwrcl_pll", "perfcl_pll",
 			"l3_sequencer", "pwrcl_sequencer",
-			"perfcl_sequencer", "apps_itm_ctl";
+			"perfcl_sequencer";
 
 		vdd-l3-supply = <&apc0_l3_vreg>;
 		vdd-pwrcl-supply = <&apc0_pwrcl_vreg>;
diff --git a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
index a773e41..fd7e140 100644
--- a/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
+++ b/Documentation/devicetree/bindings/mmc/sdhci-msm.txt
@@ -4,9 +4,13 @@
 
 Required properties:
   - compatible : should be "qcom,sdhci-msm"
+		For SDCC version 5.0.0, MCI registers are removed from SDCC interface
+		and some registers are moved to HC. New compatible string is added to
+		support this change - "qcom,sdhci-msm-v5".
   - reg : should contain SDHC, SD Core register map.
   - reg-names : indicates various resources passed to driver (via reg proptery) by name.
 		Required "reg-names" are "hc_mem" and "core_mem"
+		optional ones are "tlmm_mem"
   - interrupts : should contain SDHC interrupts.
   - interrupt-names : indicates interrupts passed to driver (via interrupts property) by name.
 		      Required "interrupt-names" are "hc_irq" and "pwr_irq".
@@ -25,12 +29,45 @@
 			   1, 4 and 8.
 	- qcom,nonremovable - specifies whether the card in slot is
 			      hot pluggable or hard wired.
+	- qcom,nonhotplug - specifies the card in slot is not hot pluggable.
+			    if card lost or removed manually at runtime, don't retry
+			    to redetect it until next reboot probe.
 	- qcom,bus-speed-mode - specifies supported bus speed modes by host.
 				The supported bus speed modes are :
 				"HS200_1p8v" - indicates that host can support HS200 at 1.8v.
 				"HS200_1p2v" - indicates that host can support HS200 at 1.2v.
 				"DDR_1p8v" - indicates that host can support DDR mode at 1.8v.
 				"DDR_1p2v" - indicates that host can support DDR mode at 1.2v.
+	- qcom,devfreq,freq-table - specifies supported frequencies for clock scaling.
+				    Clock scaling logic shall toggle between these frequencies based
+				    on card load. In case the defined frequencies are over or below
+				    the supported card frequencies, they will be overridden
+				    during card init. In case this entry is not supplied,
+				    the driver will construct one based on the card
+				    supported max and min frequencies.
+				    The frequencies must be ordered from lowest to highest.
+	- qcom,pm-qos-irq-type - the PM QoS request type to be used for IRQ voting.
+	  Can be either "affine_cores" or "affine_irq". If not specified, will default
+	  to "affine_cores". Use "affine_irq" setting in case an IRQ balancer is active,
+	  and IRQ affinity changes during runtime.
+	- qcom,pm-qos-irq-cpu - specifies the CPU for which IRQ voting shall be done.
+	  If "affine_cores" was specified for property 'qcom,pm-qos-irq-type'
+	  then this property must be defined, and is not relevant otherwise.
+	- qcom,pm-qos-irq-latency - a tuple defining two latency values with which
+	  PM QoS IRQ voting shall be done. The first value is the latecy to be used
+	  when load is high (performance mode) and the second is for low loads
+	  (power saving mode).
+	- qcom,pm-qos-cpu-groups - defines cpu groups mapping.
+	  Each cell represnets a group, which is a cpu bitmask defining which cpus belong
+	  to that group.
+	- qcom,pm-qos-<mode>-latency-us - where <mode> is either "cmdq" or "legacy".
+	  An array of latency value tuples, each tuple corresponding to a cpu group in the order
+	  defined in property 'qcom,pm-qos-cpu-groups'. The first value is the latecy to be used
+	  when load is high (performance mode) and the second is for low loads
+	  (power saving mode). These values will be used for cpu group voting for
+	  command-queueing mode or legacy respectively.
+	- qcom,core_3_0v_support: an optional property that is used to fake
+	  3.0V support for SDIO devices.
 
 In the following, <supply> can be vdd (flash core voltage) or vdd-io (I/O voltage).
 	- qcom,<supply>-always-on - specifies whether supply should be kept "on" always.
@@ -52,6 +89,12 @@
 		- pinctrl-names
 		- pinctrl-0, pinctrl-1,.. pinctrl-n
 
+	- qcom,large-address-bus - specifies whether the soc is capable of
+				 supporting larger than 32 bit address bus width.
+
+	- qcom,wakeup-on-idle: if configured, the mmcqd thread will call
+	  set_wake_up_idle(), thereby voting for it to be called on idle CPUs.
+
 Example:
 
 	aliases {
@@ -75,6 +118,8 @@
 		qcom,vdd-io-voltage-level = <1800000 2950000>;
 		qcom,vdd-io-current-level = <6 22000>;
 
+		qcom,devfreq,freq-table = <52000000 200000000>;
+
 		pinctrl-names = "active", "sleep";
 		pinctrl-0 = <&sdc1_clk_on &sdc1_cmd_on &sdc1_data_on>;
 		pinctrl-1 = <&sdc1_clk_off &sdc1_cmd_on &sdc1_data_on>;
@@ -82,6 +127,7 @@
 
                 qcom,bus-width = <4>;
 		qcom,nonremovable;
+		qcom,large-address-bus;
 		qcom,bus-speed-mode = "HS200_1p8v", "DDR_1p8v";
 
 		gpios = <&msmgpio 40 0>, /* CLK */
@@ -91,4 +137,16 @@
 			<&msmgpio 36 0>, /* DATA2 */
 			<&msmgpio 35 0>; /* DATA3 */
 		qcom,gpio-names = "CLK", "CMD", "DAT0", "DAT1", "DAT2", "DAT3";
+
+		qcom,pm-qos-irq-type = "affine_cores";
+		qcom,pm-qos-irq-cpu = <0>;
+		qcom,pm-qos-irq-latency = <500 100>;
+		qcom,pm-qos-cpu-groups = <0x03 0x0c>;
+		qcom,pm-qos-cmdq-latency-us = <50 100>, <50 100>;
+		qcom,pm-qos-legacy-latency-us = <50 100>, <50 100>;
+	};
+
+	sdhc_2: qcom,sdhc@f98a4900 {
+		qcom,pm-qos-irq-type = "affine_irq";
+		qcom,pm-qos-irq-latency = <120 200>;
 	};
diff --git a/Documentation/devicetree/bindings/power/supply/qcom/qpnp-fg-gen3.txt b/Documentation/devicetree/bindings/power/supply/qcom/qpnp-fg-gen3.txt
index addb0a6..12d32ec 100644
--- a/Documentation/devicetree/bindings/power/supply/qcom/qpnp-fg-gen3.txt
+++ b/Documentation/devicetree/bindings/power/supply/qcom/qpnp-fg-gen3.txt
@@ -96,6 +96,14 @@
 		    This value has to be specified in negative values for
 		    the charging current.
 
+- qcom,fg-chg-term-base-current
+	Usage:      optional
+	Value type: <u32>
+	Definition: Battery current (in mA) upper boundary at which the fuel
+		    gauge will issue an end of charge during discharging. If
+		    this property is not specified, then the default value used
+		    will be 75mA.
+
 - qcom,fg-delta-soc-thr
 	Usage:      optional
 	Value type: <u32>
diff --git a/Documentation/mmc/mmc-dev-attrs.txt b/Documentation/mmc/mmc-dev-attrs.txt
index 379dc99..1d832d4 100644
--- a/Documentation/mmc/mmc-dev-attrs.txt
+++ b/Documentation/mmc/mmc-dev-attrs.txt
@@ -8,12 +8,29 @@
 
 	force_ro		Enforce read-only access even if write protect switch is off.
 
+	num_wr_reqs_to_start_packing 	This attribute is used to determine
+	the trigger for activating the write packing, in case the write
+	packing control feature is enabled.
+
+	When the MMC manages to reach a point where num_wr_reqs_to_start_packing
+	write requests could be packed, it enables the write packing feature.
+	This allows us to start the write packing only when it is beneficial
+	and has minimum affect on the read latency.
+
+	The number of potential packed requests that will trigger the packing
+	can be configured via sysfs by writing the required value to:
+	/sys/block/<block_dev_name>/num_wr_reqs_to_start_packing.
+
+	The default value of num_wr_reqs_to_start_packing was determined by
+	running parallel lmdd write and lmdd read operations and calculating
+	the max number of packed writes requests.
+
 SD and MMC Device Attributes
 ============================
 
 All attributes are read-only.
 
-	cid			Card Identifaction Register
+	cid			Card Identification Register
 	csd			Card Specific Data Register
 	scr			SD Card Configuration Register (SD only)
 	date			Manufacturing Date (from CID Register)
@@ -84,3 +101,41 @@
 	clkgate_delay	Tune the clock gating delay with desired value in milliseconds.
 
 echo <desired delay> > /sys/class/mmc_host/mmcX/clkgate_delay
+
+SD/MMC/SDIO Clock Scaling Attributes
+====================================
+
+Read and write accesses are provided to following attributes.
+
+	polling_interval	Measured in milliseconds, this attribute
+				defines how often we need to check the card
+				usage and make decisions on frequency scaling.
+
+	up_threshold		This attribute defines what should be the
+				average card usage between the polling
+				interval for the mmc core to make a decision
+				on whether it should increase the frequency.
+				For example when it is set to '35' it means
+				that between the checking intervals the card
+				needs to be on average more than 35% in use to
+				scale up the frequency. The value should be
+				between 0 - 100 so that it can be compared
+				against load percentage.
+
+	down_threshold		Similar to up_threshold, but on lowering the
+				frequency. For example, when it is set to '2'
+				it means that between the checking intervals
+				the card needs to be on average less than 2%
+				in use to scale down the clocks to minimum
+				frequency. The value should be between 0 - 100
+				so that it can be compared against load
+				percentage.
+
+	enable			Enable clock scaling for hosts (and cards)
+				that support ultrahigh speed modes
+				(SDR104, DDR50, HS200).
+
+echo <desired value> > /sys/class/mmc_host/mmcX/clk_scaling/polling_interval
+echo <desired value> > /sys/class/mmc_host/mmcX/clk_scaling/up_threshold
+echo <desired value> > /sys/class/mmc_host/mmcX/clk_scaling/down_threshold
+echo <desired value> > /sys/class/mmc_host/mmcX/clk_scaling/enable
diff --git a/arch/arm64/boot/dts/qcom/sdm845-coresight.dtsi b/arch/arm64/boot/dts/qcom/sdm845-coresight.dtsi
index a3adcec..fa225ef1 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-coresight.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845-coresight.dtsi
@@ -1615,6 +1615,166 @@
 		};
 	};
 
+	etm0: etm@7040000 {
+		compatible = "arm,primecell";
+		arm,primecell-periphid = <0x000bb95d>;
+
+		reg = <0x7040000 0x1000>;
+		cpu = <&CPU0>;
+
+		coresight-name = "coresight-etm0";
+
+		clocks = <&clock_gcc RPMH_QDSS_CLK>,
+			 <&clock_gcc RPMH_QDSS_A_CLK>;
+		clock-names = "apb_pclk", "core_a_clk";
+
+		port {
+			etm0_out_funnel_apss: endpoint {
+				remote-endpoint = <&funnel_apss_in_etm0>;
+			};
+		};
+	};
+
+	etm1: etm@7140000 {
+		compatible = "arm,primecell";
+		arm,primecell-periphid = <0x000bb95d>;
+
+		reg = <0x7140000 0x1000>;
+		cpu = <&CPU1>;
+
+		coresight-name = "coresight-etm1";
+
+		clocks = <&clock_gcc RPMH_QDSS_CLK>,
+			 <&clock_gcc RPMH_QDSS_A_CLK>;
+		clock-names = "apb_pclk", "core_a_clk";
+
+		port {
+			etm1_out_funnel_apss: endpoint {
+				remote-endpoint = <&funnel_apss_in_etm1>;
+			};
+		};
+	};
+
+	etm2: etm@7240000 {
+		compatible = "arm,primecell";
+		arm,primecell-periphid = <0x000bb95d>;
+
+		reg = <0x7240000 0x1000>;
+		cpu = <&CPU2>;
+
+		coresight-name = "coresight-etm2";
+
+		clocks = <&clock_gcc RPMH_QDSS_CLK>,
+			 <&clock_gcc RPMH_QDSS_A_CLK>;
+		clock-names = "apb_pclk", "core_a_clk";
+
+		port {
+			etm2_out_funnel_apss: endpoint {
+				remote-endpoint = <&funnel_apss_in_etm2>;
+			};
+		};
+	};
+
+	etm3: etm@7340000 {
+		compatible = "arm,primecell";
+		arm,primecell-periphid = <0x000bb95d>;
+
+		reg = <0x7340000 0x1000>;
+		cpu = <&CPU3>;
+
+		coresight-name = "coresight-etm3";
+
+		clocks = <&clock_gcc RPMH_QDSS_CLK>,
+			 <&clock_gcc RPMH_QDSS_A_CLK>;
+		clock-names = "apb_pclk", "core_a_clk";
+
+		port {
+			etm3_out_funnel_apss: endpoint {
+				remote-endpoint = <&funnel_apss_in_etm3>;
+			};
+		};
+	};
+
+	etm4: etm@7440000 {
+		compatible = "arm,primecell";
+		arm,primecell-periphid = <0x000bb95d>;
+
+		reg = <0x7440000 0x1000>;
+		cpu = <&CPU4>;
+
+		coresight-name = "coresight-etm4";
+
+		clocks = <&clock_gcc RPMH_QDSS_CLK>,
+			 <&clock_gcc RPMH_QDSS_A_CLK>;
+		clock-names = "apb_pclk", "core_a_clk";
+
+		port {
+			etm4_out_funnel_apss: endpoint {
+				remote-endpoint = <&funnel_apss_in_etm4>;
+			};
+		};
+	};
+
+	etm5: etm@7540000 {
+		compatible = "arm,primecell";
+		arm,primecell-periphid = <0x000bb95d>;
+
+		reg = <0x7540000 0x1000>;
+		cpu = <&CPU5>;
+
+		coresight-name = "coresight-etm5";
+
+		clocks = <&clock_gcc RPMH_QDSS_CLK>,
+			 <&clock_gcc RPMH_QDSS_A_CLK>;
+		clock-names = "apb_pclk", "core_a_clk";
+
+		port {
+			etm5_out_funnel_apss: endpoint {
+				remote-endpoint = <&funnel_apss_in_etm5>;
+			};
+		};
+	};
+
+	etm6: etm@7640000 {
+		compatible = "arm,primecell";
+		arm,primecell-periphid = <0x000bb95d>;
+
+		reg = <0x7640000 0x1000>;
+		cpu = <&CPU6>;
+
+		coresight-name = "coresight-etm6";
+
+		clocks = <&clock_gcc RPMH_QDSS_CLK>,
+			 <&clock_gcc RPMH_QDSS_A_CLK>;
+		clock-names = "apb_pclk", "core_a_clk";
+
+		port {
+			etm6_out_funnel_apss: endpoint {
+				remote-endpoint = <&funnel_apss_in_etm6>;
+			};
+		};
+	};
+
+	etm7: etm@7740000 {
+		compatible = "arm,primecell";
+		arm,primecell-periphid = <0x000bb95d>;
+
+		reg = <0x7740000 0x1000>;
+		cpu = <&CPU7>;
+
+		coresight-name = "coresight-etm7";
+
+		clocks = <&clock_gcc RPMH_QDSS_CLK>,
+			 <&clock_gcc RPMH_QDSS_A_CLK>;
+		clock-names = "apb_pclk", "core_a_clk";
+
+		port {
+			etm7_out_funnel_apss: endpoint {
+				remote-endpoint = <&funnel_apss_in_etm7>;
+			};
+		};
+	};
+
 	funnel_apss: funnel@7800000 {
 		compatible = "arm,primecell";
 		arm,primecell-periphid = <0x0003b908>;
@@ -1639,6 +1799,77 @@
 					    <&funnel_apss_merg_in_funnel_apss>;
 				};
 			};
+			port@1 {
+				reg = <0>;
+				funnel_apss_in_etm0: endpoint {
+					slave-mode;
+					remote-endpoint =
+						<&etm0_out_funnel_apss>;
+				};
+			};
+
+			port@2 {
+				reg = <1>;
+				funnel_apss_in_etm1: endpoint {
+					slave-mode;
+					remote-endpoint =
+						<&etm1_out_funnel_apss>;
+				};
+			};
+
+			port@3 {
+				reg = <2>;
+				funnel_apss_in_etm2: endpoint {
+					slave-mode;
+					remote-endpoint =
+						<&etm2_out_funnel_apss>;
+				};
+			};
+
+			port@4 {
+				reg = <3>;
+				funnel_apss_in_etm3: endpoint {
+					slave-mode;
+					remote-endpoint =
+						<&etm3_out_funnel_apss>;
+				};
+			};
+
+			port@5 {
+				reg = <4>;
+				funnel_apss_in_etm4: endpoint {
+					slave-mode;
+					remote-endpoint =
+						<&etm4_out_funnel_apss>;
+				};
+			};
+
+			port@6 {
+				reg = <5>;
+				funnel_apss_in_etm5: endpoint {
+					slave-mode;
+					remote-endpoint =
+						<&etm5_out_funnel_apss>;
+				};
+			};
+
+			port@7 {
+				reg = <6>;
+				funnel_apss_in_etm6: endpoint {
+					slave-mode;
+					remote-endpoint =
+						<&etm6_out_funnel_apss>;
+				};
+			};
+
+			port@8 {
+				reg = <7>;
+				funnel_apss_in_etm7: endpoint {
+					slave-mode;
+					remote-endpoint =
+						<&etm7_out_funnel_apss>;
+				};
+			};
 		};
 	};
 };
diff --git a/arch/arm64/boot/dts/qcom/sdm845-usb.dtsi b/arch/arm64/boot/dts/qcom/sdm845-usb.dtsi
index 7c310cd..82b7921 100644
--- a/arch/arm64/boot/dts/qcom/sdm845-usb.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845-usb.dtsi
@@ -54,13 +54,12 @@
 			reg = <0x0a600000 0xcd00>;
 			interrupt-parent = <&intc>;
 			interrupts = <0 133 0>;
-			usb-phy = <&qusb_phy0>, <&usb_nop_phy>;
+			usb-phy = <&qusb_phy0>, <&usb_qmp_dp_phy>;
 			tx-fifo-resize;
 			linux,sysdev_is_parent;
 			snps,disable-clk-gating;
 			snps,has-lpm-erratum;
 			snps,hird-threshold = /bits/ 8 <0x10>;
-			maximum-speed = "high-speed";
 		};
 
 		qcom,usbbam@a704000 {
@@ -151,7 +150,7 @@
 			<0x1048 0x07 0x00 /* COM_PLL_IVCO */
 			 0x1080 0x14 0x00 /* COM_SYSCLK_EN_SEL */
 			 0x1034 0x08 0x00 /* COM_BIAS_EN_CLKBUFLR_EN */
-			 0x1137 0x30 0x00 /* COM_CLK_SELECT */
+			 0x1138 0x30 0x00 /* COM_CLK_SELECT */
 			 0x103c 0x02 0x00 /* COM_SYS_CLK_CTRL */
 			 0x108c 0x08 0x00 /* COM_RESETSM_CNTRL2 */
 			 0x115c 0x16 0x00 /* COM_CMN_CONFIG */
@@ -171,7 +170,7 @@
 			 0x1148 0x0a 0x00 /* COM_CORECLK_DIV_MODE0 */
 			 0x10a0 0x00 0x00 /* COM_LOCK_CMP3_MODE0 */
 			 0x109c 0x34 0x00 /* COM_LOCK_CMP2_MODE0 */
-			 0x1018 0x15 0x00 /* COM_LOCK_CMP1_MODE0 */
+			 0x1098 0x15 0x00 /* COM_LOCK_CMP1_MODE0 */
 			 0x1090 0x04 0x00 /* COM_LOCK_CMP_EN */
 			 0x1154 0x00 0x00 /* COM_CORE_CLK_EN */
 			 0x1094 0x00 0x00 /* COM_LOCK_CMP_CFG */
@@ -203,8 +202,8 @@
 			 0x1260 0x10 0x00 /* TXA_HIGHZ_DRVR_EN */
 			 0x12a4 0x12 0x00 /* TXA_RCV_DETECT_LVL_2 */
 			 0x128c 0x16 0x00 /* TXA_LANE_MODE_1 */
-			 0x1648 0x09 0x00 /* TXB_RES_CODE_LANE_OFFSET_RX */
-			 0x1644 0x0d 0x00 /* TXB_RES_CODE_LANE_OFFSET_TX */
+			 0x1248 0x09 0x00 /* TXA_RES_CODE_LANE_OFFSET_RX */
+			 0x1244 0x0d 0x00 /* TXA_RES_CODE_LANE_OFFSET_TX */
 			 0x1660 0x10 0x00 /* TXB_HIGHZ_DRVR_EN */
 			 0x16a4 0x12 0x00 /* TXB_RCV_DETECT_LVL_2 */
 			 0x168c 0x16 0x00 /* TXB_LANE_MODE_1 */
@@ -277,9 +276,9 @@
 		clock-names = "aux_clk", "pipe_clk", "ref_clk_src",
 				"ref_clk", "com_aux_clk";
 
-		resets = <&clock_gcc GCC_USB3_DP_PHY_PRIM_BCR>;
-		reset-names = "phy_reset";
-		status = "disabled";
+		resets = <&clock_gcc GCC_USB3_DP_PHY_PRIM_BCR>,
+			<&clock_gcc GCC_USB3_PHY_PRIM_BCR>;
+		reset-names = "global_phy_reset", "phy_reset";
 	};
 
 	dbm_1p5: dbm@a6f8000 {
diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index a89a57c..40952d3 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -868,12 +868,11 @@
 			<0x178b0000 0x1000>,
 			<0x17d42400 0x0c00>,
 			<0x17d44400 0x0c00>,
-			<0x17d46c00 0x0c00>,
-			<0x17810090 0x8>;
+			<0x17d46c00 0x0c00>;
 		reg-names = "osm_l3_base", "osm_pwrcl_base", "osm_perfcl_base",
 			"l3_pll", "pwrcl_pll", "perfcl_pll",
 			"l3_sequencer", "pwrcl_sequencer",
-			"perfcl_sequencer", "apps_itm_ctl";
+			"perfcl_sequencer";
 
 		vdd-l3-supply = <&apc0_l3_vreg>;
 		vdd-pwrcl-supply = <&apc0_pwrcl_vreg>;
@@ -967,7 +966,6 @@
 		qcom,safe-fsm-en;
 		qcom,ps-fsm-en;
 		qcom,droop-fsm-en;
-		qcom,osm-pll-setup;
 
 		clock-names = "xo_ao";
 		clocks = <&clock_rpmh RPMH_CXO_CLK_A>;
@@ -1005,7 +1003,7 @@
 		clock-names = "ref_clk_src",
 			"ref_clk",
 			"ref_aux_clk";
-		clocks = <&clock_rpmh RPMH_LN_BB_CLK1>,
+		clocks = <&clock_rpmh RPMH_CXO_CLK>,
 			<&clock_gcc GCC_UFS_MEM_CLKREF_CLK>,
 			<&clock_gcc GCC_UFS_PHY_PHY_AUX_CLK>;
 
@@ -1039,7 +1037,7 @@
 			<&clock_gcc GCC_UFS_PHY_AHB_CLK>,
 			<&clock_gcc GCC_UFS_PHY_UNIPRO_CORE_CLK>,
 			<&clock_gcc GCC_UFS_PHY_ICE_CORE_CLK>,
-			<&clock_rpmh RPMH_LN_BB_CLK1>,
+			<&clock_rpmh RPMH_CXO_CLK>,
 			<&clock_gcc GCC_UFS_PHY_TX_SYMBOL_0_CLK>,
 			<&clock_gcc GCC_UFS_PHY_RX_SYMBOL_0_CLK>,
 			<&clock_gcc GCC_UFS_PHY_RX_SYMBOL_1_CLK>;
@@ -1125,7 +1123,7 @@
 		clock-names = "ref_clk_src",
 			"ref_clk",
 			"ref_aux_clk";
-		clocks = <&clock_rpmh RPMH_LN_BB_CLK1>,
+		clocks = <&clock_rpmh RPMH_CXO_CLK>,
 			<&clock_gcc GCC_UFS_CARD_CLKREF_CLK>,
 			<&clock_gcc GCC_UFS_CARD_PHY_AUX_CLK>;
 
@@ -1158,7 +1156,7 @@
 			<&clock_gcc GCC_UFS_CARD_AHB_CLK>,
 			<&clock_gcc GCC_UFS_CARD_UNIPRO_CORE_CLK>,
 			<&clock_gcc GCC_UFS_CARD_ICE_CORE_CLK>,
-			<&clock_rpmh RPMH_LN_BB_CLK1>,
+			<&clock_rpmh RPMH_CXO_CLK>,
 			<&clock_gcc GCC_UFS_CARD_TX_SYMBOL_0_CLK>,
 			<&clock_gcc GCC_UFS_CARD_RX_SYMBOL_0_CLK>;
 		freq-table-hz =
@@ -1708,7 +1706,7 @@
 		reg-names = "msgram", "irq-reg-base";
 		qcom,irq-mask = <0x1>;
 		interrupts = <0 389 1>;
-		mbox_desc_offset = <0x0>;
+		mbox-desc-offset = <0x0>;
 		#mbox-cells = <1>;
 	};
 
diff --git a/arch/arm64/configs/sdm845-perf_defconfig b/arch/arm64/configs/sdm845-perf_defconfig
index e39097c..5cc46e5 100644
--- a/arch/arm64/configs/sdm845-perf_defconfig
+++ b/arch/arm64/configs/sdm845-perf_defconfig
@@ -380,6 +380,7 @@
 CONFIG_USB_CONFIGFS_F_QDSS=y
 CONFIG_MMC=y
 CONFIG_MMC_BLOCK_MINORS=32
+CONFIG_MMC_BLOCK_DEFERRED_RESUME=y
 CONFIG_MMC_TEST=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
diff --git a/arch/arm64/configs/sdm845_defconfig b/arch/arm64/configs/sdm845_defconfig
index bd24068..498e21e 100644
--- a/arch/arm64/configs/sdm845_defconfig
+++ b/arch/arm64/configs/sdm845_defconfig
@@ -327,7 +327,6 @@
 CONFIG_VIDEO_ADV_DEBUG=y
 CONFIG_VIDEO_FIXED_MINOR_RANGES=y
 CONFIG_V4L_PLATFORM_DRIVERS=y
-CONFIG_SPECTRA_CAMERA=y
 CONFIG_MSM_VIDC_V4L2=y
 CONFIG_MSM_VIDC_VMEM=y
 CONFIG_MSM_VIDC_GOVERNORS=y
@@ -390,6 +389,7 @@
 CONFIG_USB_CONFIGFS_F_QDSS=y
 CONFIG_MMC=y
 CONFIG_MMC_BLOCK_MINORS=32
+CONFIG_MMC_BLOCK_DEFERRED_RESUME=y
 CONFIG_MMC_TEST=y
 CONFIG_MMC_SDHCI=y
 CONFIG_MMC_SDHCI_PLTFM=y
diff --git a/drivers/char/diag/diagfwd.c b/drivers/char/diag/diagfwd.c
index 3fce72f7..f9dc670d 100644
--- a/drivers/char/diag/diagfwd.c
+++ b/drivers/char/diag/diagfwd.c
@@ -1345,6 +1345,7 @@
 
 static void hdlc_reset_timer_start(struct diag_md_session_t *info)
 {
+	mutex_lock(&driver->md_session_lock);
 	if (!hdlc_timer_in_progress) {
 		hdlc_timer_in_progress = 1;
 		if (info)
@@ -1354,6 +1355,7 @@
 			mod_timer(&driver->hdlc_reset_timer,
 			  jiffies + msecs_to_jiffies(200));
 	}
+	mutex_unlock(&driver->md_session_lock);
 }
 
 static void hdlc_reset_timer_func(unsigned long data)
diff --git a/drivers/clk/qcom/clk-cpu-osm.c b/drivers/clk/qcom/clk-cpu-osm.c
index a358dd0..8bbf55c 100644
--- a/drivers/clk/qcom/clk-cpu-osm.c
+++ b/drivers/clk/qcom/clk-cpu-osm.c
@@ -2266,8 +2266,6 @@
 	struct clk_osm *c;
 	struct device *dev = &pdev->dev;
 	struct clk_onecell_data *clk_data;
-	struct resource *res;
-	void *vbase;
 	char l3speedbinstr[] = "qcom,l3-speedbin0-v0";
 	char perfclspeedbinstr[] = "qcom,perfcl-speedbin0-v0";
 	char pwrclspeedbinstr[] = "qcom,pwrcl-speedbin0-v0";
@@ -2475,30 +2473,6 @@
 				(0x39 | (perfcl_clk.apm_threshold_vc << 6)));
 	}
 
-	res = platform_get_resource_byname(pdev, IORESOURCE_MEM,
-							"apps_itm_ctl");
-	if (!res) {
-		dev_err(&pdev->dev,
-			"Unable to get platform resource for apps_itm_ctl\n");
-		return -ENOMEM;
-	}
-
-	vbase = devm_ioremap(&pdev->dev, res->start,
-						resource_size(res));
-	if (!vbase) {
-		dev_err(&pdev->dev,
-				"Unable to map in apps_itm_ctl base\n");
-		return -ENOMEM;
-	}
-
-	val = readl_relaxed(vbase + 0x0);
-	val &= ~BIT(0);
-	writel_relaxed(val, vbase + 0x0);
-
-	val = readl_relaxed(vbase + 0x4);
-	val &= ~BIT(0);
-	writel_relaxed(val, vbase + 0x4);
-
 	/*
 	 * Perform typical secure-world HW initialization
 	 * as necessary.
diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c
index 8484b57..2f9cfdf 100644
--- a/drivers/clk/qcom/clk-rcg2.c
+++ b/drivers/clk/qcom/clk-rcg2.c
@@ -418,7 +418,7 @@
 static int __clk_rcg2_set_rate(struct clk_hw *hw, unsigned long rate)
 {
 	struct clk_rcg2 *rcg = to_clk_rcg2(hw);
-	const struct freq_tbl *f;
+	const struct freq_tbl *f, *f_curr;
 	int ret, curr_src_index, new_src_index;
 	struct clk_hw *curr_src = NULL, *new_src = NULL;
 
@@ -436,15 +436,17 @@
 	}
 
 	if (rcg->flags & FORCE_ENABLE_RCG) {
-		if (!rcg->current_freq)
-			rcg->current_freq = cxo_f.freq;
-
+		rcg->current_freq = clk_get_rate(hw->clk);
 		if (rcg->current_freq == cxo_f.freq)
 			curr_src_index = 0;
 		else {
-			f = qcom_find_freq(rcg->freq_tbl, rcg->current_freq);
+			f_curr = qcom_find_freq(rcg->freq_tbl,
+							rcg->current_freq);
+			if (!f_curr)
+				return -EINVAL;
+
 			curr_src_index = qcom_find_src_index(hw,
-						rcg->parent_map, f->src);
+						rcg->parent_map, f_curr->src);
 		}
 
 		new_src_index = qcom_find_src_index(hw, rcg->parent_map,
diff --git a/drivers/clk/qcom/clk-rpmh.c b/drivers/clk/qcom/clk-rpmh.c
index bcee3ee..5e11485 100644
--- a/drivers/clk/qcom/clk-rpmh.c
+++ b/drivers/clk/qcom/clk-rpmh.c
@@ -277,9 +277,6 @@
 DEFINE_CLK_RPMH_ARC(sdm845, bi_tcxo, bi_tcxo_ao, "xo.lvl", 0x3, 0x0,
 		    &apps_rsc, 19200000, CLK_RPMH_APPS_RSC_STATE_MASK,
 		    CLK_RPMH_APPS_RSC_AO_STATE_MASK);
-DEFINE_CLK_RPMH_VRM(sdm845, ln_bb_clk1, ln_bb_clk1_ao, "lnbclka1", &apps_rsc,
-		    19200000, CLK_RPMH_APPS_RSC_STATE_MASK,
-		    CLK_RPMH_APPS_RSC_AO_STATE_MASK);
 DEFINE_CLK_RPMH_VRM(sdm845, ln_bb_clk2, ln_bb_clk2_ao, "lnbclka2", &apps_rsc,
 		    19200000, CLK_RPMH_APPS_RSC_STATE_MASK,
 		    CLK_RPMH_APPS_RSC_AO_STATE_MASK);
@@ -299,8 +296,6 @@
 static struct clk_hw *sdm845_rpmh_clocks[] = {
 	[RPMH_CXO_CLK]		= &sdm845_bi_tcxo.hw,
 	[RPMH_CXO_CLK_A]	= &sdm845_bi_tcxo_ao.hw,
-	[RPMH_LN_BB_CLK1]	= &sdm845_ln_bb_clk1.hw,
-	[RPMH_LN_BB_CLK1_A]	= &sdm845_ln_bb_clk1_ao.hw,
 	[RPMH_LN_BB_CLK2]	= &sdm845_ln_bb_clk2.hw,
 	[RPMH_LN_BB_CLK2_A]	= &sdm845_ln_bb_clk2_ao.hw,
 	[RPMH_LN_BB_CLK3]	= &sdm845_ln_bb_clk3.hw,
diff --git a/drivers/gpu/drm/msm/dsi-staging/dsi_display.c b/drivers/gpu/drm/msm/dsi-staging/dsi_display.c
index bcaf428..2c5bd76 100644
--- a/drivers/gpu/drm/msm/dsi-staging/dsi_display.c
+++ b/drivers/gpu/drm/msm/dsi-staging/dsi_display.c
@@ -1105,7 +1105,7 @@
 		goto error_disable_clks;
 	}
 
-	if (display->ctrl_count > 1) {
+	if (display->ctrl_count > 1 && !(msg->flags & MIPI_DSI_MSG_UNICAST)) {
 		rc = dsi_display_broadcast_cmd(display, msg);
 		if (rc) {
 			pr_err("[%s] cmd broadcast failed, rc=%d\n",
@@ -1113,7 +1113,10 @@
 			goto error_disable_cmd_engine;
 		}
 	} else {
-		rc = dsi_ctrl_cmd_transfer(display->ctrl[0].ctrl, msg,
+		int ctrl_idx = (msg->flags & MIPI_DSI_MSG_UNICAST) ?
+				msg->ctrl : 0;
+
+		rc = dsi_ctrl_cmd_transfer(display->ctrl[ctrl_idx].ctrl, msg,
 					  DSI_CTRL_CMD_FIFO_STORE);
 		if (rc) {
 			pr_err("[%s] cmd transfer failed, rc=%d\n",
@@ -3027,6 +3030,12 @@
 	return rc;
 }
 
+int dsi_display_pre_kickoff(struct dsi_display *display,
+		struct msm_display_kickoff_params *params)
+{
+	return 0;
+}
+
 int dsi_display_enable(struct dsi_display *display)
 {
 	int rc = 0;
diff --git a/drivers/gpu/drm/msm/dsi-staging/dsi_display.h b/drivers/gpu/drm/msm/dsi-staging/dsi_display.h
index cfbb14ec..d2bc7d8 100644
--- a/drivers/gpu/drm/msm/dsi-staging/dsi_display.h
+++ b/drivers/gpu/drm/msm/dsi-staging/dsi_display.h
@@ -417,4 +417,14 @@
  * Return: error code
  */
 int dsi_display_soft_reset(void *display);
+
+/*
+ * dsi_display_pre_kickoff - program kickoff-time features
+ * @display: Pointer to private display structure
+ * @params: Parameters for kickoff-time programming
+ * Returns: Zero on success
+ */
+int dsi_display_pre_kickoff(struct dsi_display *display,
+		struct msm_display_kickoff_params *params);
+
 #endif /* _DSI_DISPLAY_H_ */
diff --git a/drivers/gpu/drm/msm/dsi-staging/dsi_drm.c b/drivers/gpu/drm/msm/dsi-staging/dsi_drm.c
index 24a740b..556c0d8 100644
--- a/drivers/gpu/drm/msm/dsi-staging/dsi_drm.c
+++ b/drivers/gpu/drm/msm/dsi-staging/dsi_drm.c
@@ -475,6 +475,18 @@
 	return MODE_OK;
 }
 
+int dsi_conn_pre_kickoff(struct drm_connector *connector,
+		void *display,
+		struct msm_display_kickoff_params *params)
+{
+	if (!connector || !display || !params) {
+		pr_err("Invalid params\n");
+		return -EINVAL;
+	}
+
+	return dsi_display_pre_kickoff(display, params);
+}
+
 struct dsi_bridge *dsi_drm_bridge_init(struct dsi_display *display,
 				       struct drm_device *dev,
 				       struct drm_encoder *encoder)
diff --git a/drivers/gpu/drm/msm/dsi-staging/dsi_drm.h b/drivers/gpu/drm/msm/dsi-staging/dsi_drm.h
index 934899b..4339a11 100644
--- a/drivers/gpu/drm/msm/dsi-staging/dsi_drm.h
+++ b/drivers/gpu/drm/msm/dsi-staging/dsi_drm.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -80,4 +80,15 @@
 
 void dsi_drm_bridge_cleanup(struct dsi_bridge *bridge);
 
+/**
+ * dsi_display_pre_kickoff - program kickoff-time features
+ * @connector: Pointer to drm connector structure
+ * @display: Pointer to private display structure
+ * @params: Parameters for kickoff-time programming
+ * Returns: Zero on success
+ */
+int dsi_conn_pre_kickoff(struct drm_connector *connector,
+		void *display,
+		struct msm_display_kickoff_params *params);
+
 #endif /* _DSI_DRM_H_ */
diff --git a/drivers/gpu/drm/msm/dsi-staging/dsi_panel.h b/drivers/gpu/drm/msm/dsi-staging/dsi_panel.h
index 57226ba..9f63089 100644
--- a/drivers/gpu/drm/msm/dsi-staging/dsi_panel.h
+++ b/drivers/gpu/drm/msm/dsi-staging/dsi_panel.h
@@ -109,6 +109,7 @@
 	enum dsi_cmd_set_type type;
 	enum dsi_cmd_set_state state;
 	u32 count;
+	int ctrl_idx;
 	struct dsi_cmd_desc *cmds;
 };
 
diff --git a/drivers/gpu/drm/msm/msm_drv.c b/drivers/gpu/drm/msm/msm_drv.c
index ced015f..d7b225b 100644
--- a/drivers/gpu/drm/msm/msm_drv.c
+++ b/drivers/gpu/drm/msm/msm_drv.c
@@ -15,6 +15,27 @@
  * You should have received a copy of the GNU General Public License along with
  * this program.  If not, see <http://www.gnu.org/licenses/>.
  */
+/*
+ * Copyright (c) 2016 Intel Corporation
+ *
+ * Permission to use, copy, modify, distribute, and sell this software and its
+ * documentation for any purpose is hereby granted without fee, provided that
+ * the above copyright notice appear in all copies and that both that copyright
+ * notice and this permission notice appear in supporting documentation, and
+ * that the name of the copyright holders not be used in advertising or
+ * publicity pertaining to distribution of the software without specific,
+ * written prior permission. The copyright holders make no representations
+ * about the suitability of this software for any purpose. It is provided "as
+ * is" without express or implied warranty.
+ *
+ * THE COPYRIGHT HOLDERS DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
+ * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO
+ * EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY SPECIAL, INDIRECT OR
+ * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE,
+ * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER
+ * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THIS SOFTWARE.
+ */
 
 #include <linux/of_address.h>
 #include <linux/kthread.h>
@@ -1331,6 +1352,109 @@
 	return drm_release(inode, filp);
 }
 
+/**
+ * msm_drv_framebuffer_remove - remove and unreference a framebuffer object
+ * @fb: framebuffer to remove
+ */
+void msm_drv_framebuffer_remove(struct drm_framebuffer *fb)
+{
+	struct drm_device *dev;
+
+	if (!fb)
+		return;
+
+	dev = fb->dev;
+
+	WARN_ON(!list_empty(&fb->filp_head));
+
+	drm_framebuffer_unreference(fb);
+}
+
+struct msm_drv_rmfb2_work {
+	struct work_struct work;
+	struct list_head fbs;
+};
+
+static void msm_drv_rmfb2_work_fn(struct work_struct *w)
+{
+	struct msm_drv_rmfb2_work *arg = container_of(w, typeof(*arg), work);
+
+	while (!list_empty(&arg->fbs)) {
+		struct drm_framebuffer *fb =
+			list_first_entry(&arg->fbs, typeof(*fb), filp_head);
+
+		list_del_init(&fb->filp_head);
+		msm_drv_framebuffer_remove(fb);
+	}
+}
+
+/**
+ * msm_ioctl_rmfb2 - remove an FB from the configuration
+ * @dev: drm device for the ioctl
+ * @data: data pointer for the ioctl
+ * @file_priv: drm file for the ioctl call
+ *
+ * Remove the FB specified by the user.
+ *
+ * Called by the user via ioctl.
+ *
+ * Returns:
+ * Zero on success, negative errno on failure.
+ */
+int msm_ioctl_rmfb2(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	struct drm_framebuffer *fb = NULL;
+	struct drm_framebuffer *fbl = NULL;
+	uint32_t *id = data;
+	int found = 0;
+
+	if (!drm_core_check_feature(dev, DRIVER_MODESET))
+		return -EINVAL;
+
+	fb = drm_framebuffer_lookup(dev, *id);
+	if (!fb)
+		return -ENOENT;
+
+	/* drop extra ref from traversing drm_framebuffer_lookup */
+	drm_framebuffer_unreference(fb);
+
+	mutex_lock(&file_priv->fbs_lock);
+	list_for_each_entry(fbl, &file_priv->fbs, filp_head)
+		if (fb == fbl)
+			found = 1;
+	if (!found) {
+		mutex_unlock(&file_priv->fbs_lock);
+		return -ENOENT;
+	}
+
+	list_del_init(&fb->filp_head);
+	mutex_unlock(&file_priv->fbs_lock);
+
+	/*
+	 * we now own the reference that was stored in the fbs list
+	 *
+	 * drm_framebuffer_remove may fail with -EINTR on pending signals,
+	 * so run this in a separate stack as there's no way to correctly
+	 * handle this after the fb is already removed from the lookup table.
+	 */
+	if (drm_framebuffer_read_refcount(fb) > 1) {
+		struct msm_drv_rmfb2_work arg;
+
+		INIT_WORK_ONSTACK(&arg.work, msm_drv_rmfb2_work_fn);
+		INIT_LIST_HEAD(&arg.fbs);
+		list_add_tail(&fb->filp_head, &arg.fbs);
+
+		schedule_work(&arg.work);
+		flush_work(&arg.work);
+		destroy_work_on_stack(&arg.work);
+	} else
+		drm_framebuffer_unreference(fb);
+
+	return 0;
+}
+EXPORT_SYMBOL(msm_ioctl_rmfb2);
+
 static const struct drm_ioctl_desc msm_ioctls[] = {
 	DRM_IOCTL_DEF_DRV(MSM_GET_PARAM,    msm_ioctl_get_param,    DRM_AUTH|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(MSM_GEM_NEW,      msm_ioctl_gem_new,      DRM_AUTH|DRM_RENDER_ALLOW),
@@ -1345,6 +1469,8 @@
 			  DRM_UNLOCKED|DRM_CONTROL_ALLOW),
 	DRM_IOCTL_DEF_DRV(MSM_DEREGISTER_EVENT,  msm_ioctl_deregister_event,
 			  DRM_UNLOCKED|DRM_CONTROL_ALLOW),
+	DRM_IOCTL_DEF_DRV(MSM_RMFB2, msm_ioctl_rmfb2,
+			  DRM_CONTROL_ALLOW|DRM_UNLOCKED),
 };
 
 static const struct vm_operations_struct vm_ops = {
diff --git a/drivers/gpu/drm/msm/msm_drv.h b/drivers/gpu/drm/msm/msm_drv.h
index da76fbc..4b263d3 100644
--- a/drivers/gpu/drm/msm/msm_drv.h
+++ b/drivers/gpu/drm/msm/msm_drv.h
@@ -363,6 +363,29 @@
 	struct msm_compression_info comp_info;
 };
 
+#define MSM_MAX_ROI	4
+
+/**
+ * struct msm_roi_mapping - Regions of interest structure for mapping CRTC to
+ *	Connector output
+ * @num_rects: number of valid rectangles in src and dst arrays
+ * @src: source roi rectangle
+ * @dst: destination roi rectangle
+ */
+struct msm_roi_mapping {
+	uint32_t num_rects;
+	struct drm_clip_rect src[MSM_MAX_ROI];
+	struct drm_clip_rect dst[MSM_MAX_ROI];
+};
+
+/**
+ * struct - msm_display_kickoff_params - info for display features at kickoff
+ * @rois: Regions of interest structure for mapping CRTC to Connector output
+ */
+struct msm_display_kickoff_params {
+	struct msm_roi_mapping *rois;
+};
+
 /**
  * struct msm_drm_event - defines custom event notification struct
  * @base: base object required for event notification by DRM framework.
diff --git a/drivers/gpu/drm/msm/sde/sde_connector.c b/drivers/gpu/drm/msm/sde/sde_connector.c
index 1f39180..6e793d9 100644
--- a/drivers/gpu/drm/msm/sde/sde_connector.c
+++ b/drivers/gpu/drm/msm/sde/sde_connector.c
@@ -215,6 +215,38 @@
 	return c_conn->ops.get_info(info, c_conn->display);
 }
 
+int sde_connector_pre_kickoff(struct drm_connector *connector)
+{
+	struct sde_connector *c_conn;
+	struct sde_connector_state *c_state;
+	struct msm_display_kickoff_params params;
+	int rc;
+
+	if (!connector) {
+		SDE_ERROR("invalid argument\n");
+		return -EINVAL;
+	}
+
+	c_conn = to_sde_connector(connector);
+	c_state = to_sde_connector_state(connector->state);
+
+	if (!c_conn->display) {
+		SDE_ERROR("invalid argument\n");
+		return -EINVAL;
+	}
+
+	if (!c_conn->ops.pre_kickoff)
+		return 0;
+
+	params.rois = &c_state->rois;
+
+	SDE_EVT32_VERBOSE(connector->base.id);
+
+	rc = c_conn->ops.pre_kickoff(connector, c_conn->display, &params);
+
+	return rc;
+}
+
 static void sde_connector_destroy(struct drm_connector *connector)
 {
 	struct sde_connector *c_conn;
diff --git a/drivers/gpu/drm/msm/sde/sde_connector.h b/drivers/gpu/drm/msm/sde/sde_connector.h
index 9d36851..70d4952 100644
--- a/drivers/gpu/drm/msm/sde/sde_connector.h
+++ b/drivers/gpu/drm/msm/sde/sde_connector.h
@@ -138,6 +138,18 @@
 	 * Return: Zero on success, -ERROR otherwise
 	 */
 	int (*soft_reset)(void *display);
+
+	/**
+	 * pre_kickoff - trigger display to program kickoff-time features
+	 * @connector: Pointer to drm connector structure
+	 * @display: Pointer to private display structure
+	 * @params: Parameter bundle of connector-stored information for
+	 *	kickoff-time programming into the display
+	 * Returns: Zero on success
+	 */
+	int (*pre_kickoff)(struct drm_connector *connector,
+			void *display,
+			struct msm_display_kickoff_params *params);
 };
 
 /**
@@ -253,12 +265,15 @@
  * @out_fb: Pointer to output frame buffer, if applicable
  * @mmu_id: MMU ID for accessing frame buffer objects, if applicable
  * @property_values: Local cache of current connector property values
+ * @rois: Regions of interest structure for mapping CRTC to Connector output
  */
 struct sde_connector_state {
 	struct drm_connector_state base;
 	struct drm_framebuffer *out_fb;
 	int mmu_id;
 	uint64_t property_values[CONNECTOR_PROP_COUNT];
+
+	struct msm_roi_mapping rois;
 };
 
 /**
@@ -403,5 +418,12 @@
 int sde_connector_register_custom_event(struct sde_kms *kms,
 		struct drm_connector *conn_drm, u32 event, bool en);
 
+/**
+ * sde_connector_pre_kickoff - trigger kickoff time feature programming
+ * @connector: Pointer to drm connector object
+ * Returns: Zero on success
+ */
+int sde_connector_pre_kickoff(struct drm_connector *connector);
+
 #endif /* _SDE_CONNECTOR_H_ */
 
diff --git a/drivers/gpu/drm/msm/sde/sde_core_irq.c b/drivers/gpu/drm/msm/sde/sde_core_irq.c
index 71a8bdf..5adef2d 100644
--- a/drivers/gpu/drm/msm/sde/sde_core_irq.c
+++ b/drivers/gpu/drm/msm/sde/sde_core_irq.c
@@ -320,7 +320,7 @@
 
 DEFINE_SDE_DEBUGFS_SEQ_FOPS(sde_debugfs_core_irq);
 
-static int sde_debugfs_core_irq_init(struct sde_kms *sde_kms,
+int sde_debugfs_core_irq_init(struct sde_kms *sde_kms,
 		struct dentry *parent)
 {
 	sde_kms->irq_obj.debugfs_file = debugfs_create_file("core_irq", 0644,
@@ -330,20 +330,20 @@
 	return 0;
 }
 
-static void sde_debugfs_core_irq_destroy(struct sde_kms *sde_kms)
+void sde_debugfs_core_irq_destroy(struct sde_kms *sde_kms)
 {
 	debugfs_remove(sde_kms->irq_obj.debugfs_file);
 	sde_kms->irq_obj.debugfs_file = NULL;
 }
 
 #else
-static int sde_debugfs_core_irq_init(struct sde_kms *sde_kms,
+int sde_debugfs_core_irq_init(struct sde_kms *sde_kms,
 		struct dentry *parent)
 {
 	return 0;
 }
 
-static void sde_debugfs_core_irq_destroy(struct sde_kms *sde_kms)
+void sde_debugfs_core_irq_destroy(struct sde_kms *sde_kms)
 {
 }
 #endif
@@ -385,8 +385,6 @@
 		atomic_set(&sde_kms->irq_obj.enable_counts[i], 0);
 		atomic_set(&sde_kms->irq_obj.irq_counts[i], 0);
 	}
-
-	sde_debugfs_core_irq_init(sde_kms, sde_debugfs_get_root(sde_kms));
 }
 
 int sde_core_irq_postinstall(struct sde_kms *sde_kms)
@@ -411,8 +409,6 @@
 	}
 	priv = sde_kms->dev->dev_private;
 
-	sde_debugfs_core_irq_destroy(sde_kms);
-
 	sde_power_resource_enable(&priv->phandle, sde_kms->core_client, true);
 	for (i = 0; i < sde_kms->irq_obj.total_irqs; i++)
 		if (atomic_read(&sde_kms->irq_obj.enable_counts[i]) ||
diff --git a/drivers/gpu/drm/msm/sde/sde_core_irq.h b/drivers/gpu/drm/msm/sde/sde_core_irq.h
index 92642e7..64f4160 100644
--- a/drivers/gpu/drm/msm/sde/sde_core_irq.h
+++ b/drivers/gpu/drm/msm/sde/sde_core_irq.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2015-2017, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -135,4 +135,19 @@
 		int irq_idx,
 		struct sde_irq_callback *irq_cb);
 
+/**
+ * sde_debugfs_core_irq_init - register core irq debugfs
+ * @sde_kms: pointer to kms
+ * @parent: debugfs directory root
+ * @Return: 0 on success
+ */
+int sde_debugfs_core_irq_init(struct sde_kms *sde_kms,
+		struct dentry *parent);
+
+/**
+ * sde_debugfs_core_irq_destroy - deregister core irq debugfs
+ * @sde_kms: pointer to kms
+ */
+void sde_debugfs_core_irq_destroy(struct sde_kms *sde_kms);
+
 #endif /* __SDE_CORE_IRQ_H__ */
diff --git a/drivers/gpu/drm/msm/sde/sde_crtc.c b/drivers/gpu/drm/msm/sde/sde_crtc.c
index 54acf41a..db637ad 100644
--- a/drivers/gpu/drm/msm/sde/sde_crtc.c
+++ b/drivers/gpu/drm/msm/sde/sde_crtc.c
@@ -628,8 +628,8 @@
 		split_dim_layer.flags = dim_layer->flags;
 		mixer_rect.x = i * mixer_width;
 
-		sde_kms_rect_intersect(&split_dim_layer.rect, &mixer_rect,
-					&dim_layer->rect);
+		sde_kms_rect_intersect(&mixer_rect, &dim_layer->rect,
+					&split_dim_layer.rect);
 		if (!split_dim_layer.rect.w && !split_dim_layer.rect.h) {
 			/*
 			 * no extra programming required for non-intersecting
@@ -2652,7 +2652,7 @@
 		return -EINVAL;
 
 	sde_crtc->debugfs_root = debugfs_create_dir(sde_crtc->name,
-			sde_debugfs_get_root(sde_kms));
+			crtc->dev->primary->debugfs_root);
 	if (!sde_crtc->debugfs_root)
 		return -ENOMEM;
 
diff --git a/drivers/gpu/drm/msm/sde/sde_encoder.c b/drivers/gpu/drm/msm/sde/sde_encoder.c
index 20d5e52..67fb783 100644
--- a/drivers/gpu/drm/msm/sde/sde_encoder.c
+++ b/drivers/gpu/drm/msm/sde/sde_encoder.c
@@ -1241,6 +1241,7 @@
 	struct sde_encoder_phys *phys;
 	bool needs_hw_reset = false;
 	unsigned int i;
+	int rc;
 
 	if (!drm_enc) {
 		SDE_ERROR("invalid encoder\n");
@@ -1270,6 +1271,14 @@
 				phys->ops.hw_reset(phys);
 		}
 	}
+
+	if (sde_enc->cur_master && sde_enc->cur_master->connector) {
+		rc = sde_connector_pre_kickoff(sde_enc->cur_master->connector);
+		if (rc)
+			SDE_ERROR_ENC(sde_enc, "kickoff conn%d failed rc %d\n",
+					sde_enc->cur_master->connector->base.id,
+					rc);
+	}
 }
 
 void sde_encoder_kickoff(struct drm_encoder *drm_enc)
@@ -1530,6 +1539,7 @@
 	struct sde_encoder_virt *sde_enc;
 	struct msm_drm_private *priv;
 	struct sde_kms *sde_kms;
+	int i;
 
 	static const struct file_operations debugfs_status_fops = {
 		.open =		_sde_encoder_debugfs_status_open,
@@ -1559,7 +1569,7 @@
 
 	/* create overall sub-directory for the encoder */
 	sde_enc->debugfs_root = debugfs_create_dir(name,
-					sde_debugfs_get_root(sde_kms));
+			drm_enc->dev->primary->debugfs_root);
 	if (!sde_enc->debugfs_root)
 		return -ENOMEM;
 
@@ -1570,6 +1580,13 @@
 	debugfs_create_file("misr_data", 0644,
 		sde_enc->debugfs_root, sde_enc, &debugfs_misr_fops);
 
+	for (i = 0; i < sde_enc->num_phys_encs; i++)
+		if (sde_enc->phys_encs[i] &&
+				sde_enc->phys_encs[i]->ops.late_register)
+			sde_enc->phys_encs[i]->ops.late_register(
+					sde_enc->phys_encs[i],
+					sde_enc->debugfs_root);
+
 	return 0;
 }
 
diff --git a/drivers/gpu/drm/msm/sde/sde_kms.c b/drivers/gpu/drm/msm/sde/sde_kms.c
index 7e18a0e..98c59c3 100644
--- a/drivers/gpu/drm/msm/sde/sde_kms.c
+++ b/drivers/gpu/drm/msm/sde/sde_kms.c
@@ -302,8 +302,9 @@
 	/* allow debugfs_root to be NULL */
 	debugfs_create_x32(SDE_DEBUGFS_HWMASKNAME, 0644, debugfs_root, p);
 
-	sde_debugfs_danger_init(sde_kms, debugfs_root);
-	sde_debugfs_vbif_init(sde_kms, debugfs_root);
+	(void) sde_debugfs_danger_init(sde_kms, debugfs_root);
+	(void) sde_debugfs_vbif_init(sde_kms, debugfs_root);
+	(void) sde_debugfs_core_irq_init(sde_kms, debugfs_root);
 
 	rc = sde_core_perf_debugfs_init(&sde_kms->perf, debugfs_root);
 	if (rc) {
@@ -320,6 +321,7 @@
 	if (sde_kms) {
 		sde_debugfs_vbif_destroy(sde_kms);
 		sde_debugfs_danger_destroy(sde_kms);
+		sde_debugfs_core_irq_destroy(sde_kms);
 	}
 }
 #else
@@ -549,7 +551,8 @@
 		.mode_valid = dsi_conn_mode_valid,
 		.get_info =   dsi_display_get_info,
 		.set_backlight = dsi_display_set_backlight,
-		.soft_reset   = dsi_display_soft_reset
+		.soft_reset   = dsi_display_soft_reset,
+		.pre_kickoff  = dsi_conn_pre_kickoff
 	};
 	static const struct sde_connector_ops wb_ops = {
 		.post_init =    sde_wb_connector_post_init,
diff --git a/drivers/gpu/drm/msm/sde/sde_kms.h b/drivers/gpu/drm/msm/sde/sde_kms.h
index ebc277e..d38a6b9 100644
--- a/drivers/gpu/drm/msm/sde/sde_kms.h
+++ b/drivers/gpu/drm/msm/sde/sde_kms.h
@@ -397,13 +397,47 @@
 void sde_kms_info_stop(struct sde_kms_info *info);
 
 /**
- * sde_kms_rect_intersect() - find the intersecting region between two rects
- * @res: Intersecting region between the two rectangles
- * @rect1: first rectangle coordinates
- * @rect2: second rectangle coordinates
+ * sde_kms_rect_intersect - intersect two rectangles
+ * @r1: first rectangle
+ * @r2: scissor rectangle
+ * @result: result rectangle, all 0's on no intersection found
  */
-void sde_kms_rect_intersect(struct sde_rect *res,
-		const struct sde_rect *rect1, const struct sde_rect *rect2);
+void sde_kms_rect_intersect(const struct sde_rect *r1,
+		const struct sde_rect *r2,
+		struct sde_rect *result);
+
+/**
+ * sde_kms_rect_is_equal - compares two rects
+ * @r1: rect value to compare
+ * @r2: rect value to compare
+ *
+ * Returns 1 if the rects are same, 0 otherwise.
+ */
+static inline bool sde_kms_rect_is_equal(struct sde_rect *r1,
+		struct sde_rect *r2)
+{
+	if ((!r1 && r2) || (r1 && !r2))
+		return false;
+
+	if (!r1 && !r2)
+		return true;
+
+	return r1->x == r2->x && r1->y == r2->y && r1->w == r2->w &&
+			r1->h == r2->h;
+}
+
+/**
+ * sde_kms_rect_is_null - returns true if the width or height of a rect is 0
+ * @rect: rectangle to check for zero size
+ * @Return: True if width or height of rectangle is 0
+ */
+static inline bool sde_kms_rect_is_null(const struct sde_rect *r)
+{
+	if (!r)
+		return true;
+
+	return (!r->w || !r->h);
+}
 
 /**
  * Vblank enable/disable functions
diff --git a/drivers/gpu/drm/msm/sde/sde_kms_utils.c b/drivers/gpu/drm/msm/sde/sde_kms_utils.c
index f95f5df..30e12c9 100644
--- a/drivers/gpu/drm/msm/sde/sde_kms_utils.c
+++ b/drivers/gpu/drm/msm/sde/sde_kms_utils.c
@@ -152,18 +152,26 @@
 	}
 }
 
-void sde_kms_rect_intersect(struct sde_rect *res,
-		const struct sde_rect *rect1, const struct sde_rect *rect2)
+void sde_kms_rect_intersect(const struct sde_rect *r1,
+		const struct sde_rect *r2,
+		struct sde_rect *result)
 {
 	int l, t, r, b;
 
-	l = max(rect1->x, rect2->x);
-	t = max(rect1->y, rect2->y);
-	r = min((rect1->x + rect1->w), (rect2->x + rect2->w));
-	b = min((rect1->y + rect1->h), (rect2->y + rect2->h));
+	if (!r1 || !r2 || !result)
+		return;
 
-	if (r < l || b < t)
-		*res = (struct sde_rect) {0, 0, 0, 0};
-	else
-		*res = (struct sde_rect) {l, t, (r - l), (b - t)};
+	l = max(r1->x, r2->x);
+	t = max(r1->y, r2->y);
+	r = min((r1->x + r1->w), (r2->x + r2->w));
+	b = min((r1->y + r1->h), (r2->y + r2->h));
+
+	if (r < l || b < t) {
+		memset(result, 0, sizeof(*result));
+	} else {
+		result->x = l;
+		result->y = t;
+		result->w = r - l;
+		result->h = b - t;
+	}
 }
diff --git a/drivers/gpu/drm/msm/sde/sde_plane.c b/drivers/gpu/drm/msm/sde/sde_plane.c
index e8892fb..bd6b302 100644
--- a/drivers/gpu/drm/msm/sde/sde_plane.c
+++ b/drivers/gpu/drm/msm/sde/sde_plane.c
@@ -2853,7 +2853,7 @@
 		 * Cropping is not required as hardware will consider only the
 		 * intersecting region with the src rect.
 		 */
-		sde_kms_rect_intersect(&intersect, &src, &pstate->excl_rect);
+		sde_kms_rect_intersect(&src, &pstate->excl_rect, &intersect);
 		if (!intersect.w || !intersect.h || SDE_FORMAT_IS_YUV(fmt)) {
 			SDE_ERROR_PLANE(psde,
 				"invalid excl_rect:{%d,%d,%d,%d} src:{%d,%d,%d,%d}, fmt: %4.4s\n",
@@ -3734,7 +3734,7 @@
 	/* create overall sub-directory for the pipe */
 	psde->debugfs_root =
 		debugfs_create_dir(psde->pipe_name,
-				sde_debugfs_get_root(kms));
+				plane->dev->primary->debugfs_root);
 
 	if (!psde->debugfs_root)
 		return -ENOMEM;
@@ -3953,7 +3953,7 @@
 	if (master_plane_id)
 		format_list = plane_formats;
 
-	psde->nformats = sde_populate_formats(plane_formats,
+	psde->nformats = sde_populate_formats(format_list,
 				psde->formats,
 				0,
 				ARRAY_SIZE(psde->formats));
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.c b/drivers/hwtracing/coresight/coresight-etm4x.c
index 4fc5916..b04e8da 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x.c
@@ -1,4 +1,4 @@
-/* Copyright (c) 2014, 2016, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2014, 2016-2017, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -62,7 +62,7 @@
 static bool etm4_arch_supported(u8 arch)
 {
 	switch (arch) {
-	case ETM_ARCH_V4:
+	case ETM_ARCH_MAJOR_V4:
 		break;
 	default:
 		return false;
@@ -482,7 +482,7 @@
 	 * TRCARCHMIN, bits[7:4] architecture the minor version number
 	 * TRCARCHMAJ, bits[11:8] architecture major versin number
 	 */
-	drvdata->arch = BMVAL(etmidr1, 4, 11);
+	drvdata->arch = BMVAL(etmidr1, 8, 11);
 
 	/* maximum size of resources */
 	etmidr2 = readl_relaxed(drvdata->base + TRCIDR2);
@@ -528,8 +528,8 @@
 	else
 		drvdata->sysstall = false;
 
-	/* NUMPROC, bits[30:28] the number of PEs available for tracing */
-	drvdata->nr_pe = BMVAL(etmidr3, 28, 30);
+	/* NUMPROC, bits[13:12, 30:28] the number of PEs available for trace */
+	drvdata->nr_pe = (BMVAL(etmidr3, 12, 13) << 3) | BMVAL(etmidr3, 28, 30);
 
 	/* NOOVERFLOW, bit[31] is trace overflow prevention supported */
 	if (BMVAL(etmidr3, 31, 31))
diff --git a/drivers/hwtracing/coresight/coresight-etm4x.h b/drivers/hwtracing/coresight/coresight-etm4x.h
index ba8d3f8..4e51ecdc 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x.h
+++ b/drivers/hwtracing/coresight/coresight-etm4x.h
@@ -1,4 +1,4 @@
-/* Copyright (c) 2014-2015, The Linux Foundation. All rights reserved.
+/* Copyright (c) 2014-2015, 2017, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -143,7 +143,7 @@
 #define ETM_MAX_RES_SEL			16
 #define ETM_MAX_SS_CMP			8
 
-#define ETM_ARCH_V4			0x40
+#define ETM_ARCH_MAJOR_V4		0x4
 #define ETMv4_SYNC_MASK			0x1F
 #define ETM_CYC_THRESHOLD_MASK		0xFFF
 #define ETMv4_EVENT_MASK		0xFF
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index 2d2abe3..833f10d 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -447,7 +447,9 @@
 	writel_relaxed(axictl, drvdata->base + TMC_AXICTL);
 
 	writel_relaxed(drvdata->paddr, drvdata->base + TMC_DBALO);
-	writel_relaxed(0x0, drvdata->base + TMC_DBAHI);
+	writel_relaxed(((u64)drvdata->paddr >> 32) & 0xFF,
+		       drvdata->base + TMC_DBAHI);
+
 	writel_relaxed(TMC_FFCR_EN_FMT | TMC_FFCR_EN_TI |
 		       TMC_FFCR_FON_FLIN | TMC_FFCR_FON_TRIG_EVT |
 		       TMC_FFCR_TRIGON_TRIGIN,
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 34df44c..9fcea85 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -3977,6 +3977,8 @@
 	void __iomem			*tcu_base;
 	u32				version;
 };
+#define get_qsmmuv500_archdata(smmu)				\
+	((struct qsmmuv500_archdata *)(smmu->archdata))
 
 struct qsmmuv500_tbu_device {
 	struct list_head		list;
@@ -3997,7 +3999,7 @@
 static int qsmmuv500_tbu_power_on_all(struct arm_smmu_device *smmu)
 {
 	struct qsmmuv500_tbu_device *tbu;
-	struct qsmmuv500_archdata *data = smmu->archdata;
+	struct qsmmuv500_archdata *data = get_qsmmuv500_archdata(smmu);
 	int ret = 0;
 
 	list_for_each_entry(tbu, &data->tbus, list) {
@@ -4017,7 +4019,7 @@
 static void qsmmuv500_tbu_power_off_all(struct arm_smmu_device *smmu)
 {
 	struct qsmmuv500_tbu_device *tbu;
-	struct qsmmuv500_archdata *data = smmu->archdata;
+	struct qsmmuv500_archdata *data = get_qsmmuv500_archdata(smmu);
 
 	list_for_each_entry_reverse(tbu, &data->tbus, list) {
 		arm_smmu_power_off(tbu->pwr);
@@ -4085,7 +4087,7 @@
 static int qsmmuv500_halt_all(struct arm_smmu_device *smmu)
 {
 	struct qsmmuv500_tbu_device *tbu;
-	struct qsmmuv500_archdata *data = smmu->archdata;
+	struct qsmmuv500_archdata *data = get_qsmmuv500_archdata(smmu);
 	int ret = 0;
 
 	list_for_each_entry(tbu, &data->tbus, list) {
@@ -4106,7 +4108,7 @@
 static void qsmmuv500_resume_all(struct arm_smmu_device *smmu)
 {
 	struct qsmmuv500_tbu_device *tbu;
-	struct qsmmuv500_archdata *data = smmu->archdata;
+	struct qsmmuv500_archdata *data = get_qsmmuv500_archdata(smmu);
 
 	list_for_each_entry(tbu, &data->tbus, list) {
 		qsmmuv500_tbu_resume(tbu);
@@ -4117,14 +4119,14 @@
 	struct arm_smmu_device *smmu, u32 sid)
 {
 	struct qsmmuv500_tbu_device *tbu = NULL;
-	struct qsmmuv500_archdata *data = smmu->archdata;
+	struct qsmmuv500_archdata *data = get_qsmmuv500_archdata(smmu);
 
 	list_for_each_entry(tbu, &data->tbus, list) {
 		if (tbu->sid_start <= sid &&
 		    sid < tbu->sid_start + tbu->num_sids)
-			break;
+			return tbu;
 	}
-	return tbu;
+	return NULL;
 }
 
 static void qsmmuv500_device_reset(struct arm_smmu_device *smmu)
@@ -4150,7 +4152,7 @@
 				unsigned long *flags)
 {
 	struct arm_smmu_device *smmu = tbu->smmu;
-	struct qsmmuv500_archdata *data = smmu->archdata;
+	struct qsmmuv500_archdata *data = get_qsmmuv500_archdata(smmu);
 	u32 val;
 
 	spin_lock_irqsave(&smmu->atos_lock, *flags);
@@ -4174,7 +4176,7 @@
 					unsigned long *flags)
 {
 	struct arm_smmu_device *smmu = tbu->smmu;
-	struct qsmmuv500_archdata *data = smmu->archdata;
+	struct qsmmuv500_archdata *data = get_qsmmuv500_archdata(smmu);
 
 	/* The status register is not accessible on version 1.0 */
 	if (data->version != 0x01000000)
@@ -4326,11 +4328,11 @@
 	return qsmmuv500_iova_to_phys(domain, iova, sid);
 }
 
-static int qsmmuv500_tbu_register(struct device *dev, void *data)
+static int qsmmuv500_tbu_register(struct device *dev, void *cookie)
 {
-	struct arm_smmu_device *smmu = data;
+	struct arm_smmu_device *smmu = cookie;
 	struct qsmmuv500_tbu_device *tbu;
-	struct list_head *list = smmu->archdata;
+	struct qsmmuv500_archdata *data = get_qsmmuv500_archdata(smmu);
 
 	if (!dev->driver) {
 		dev_err(dev, "TBU failed probe, QSMMUV500 cannot continue!\n");
@@ -4341,7 +4343,7 @@
 
 	INIT_LIST_HEAD(&tbu->list);
 	tbu->smmu = smmu;
-	list_add(&tbu->list, list);
+	list_add(&tbu->list, &data->tbus);
 	return 0;
 }
 
diff --git a/drivers/mailbox/qti-tcs.c b/drivers/mailbox/qti-tcs.c
index 1c73c5a2..5efab6c 100644
--- a/drivers/mailbox/qti-tcs.c
+++ b/drivers/mailbox/qti-tcs.c
@@ -440,7 +440,6 @@
 	struct tcs_mbox_msg *msg = resp->msg;
 	struct tcs_drv *drv = resp->drv;
 	int m = resp->m;
-	int err = -EIO;
 
 	/*
 	 * In case the RPMH resource fails to respond to the completion
@@ -471,7 +470,7 @@
 	}
 
 	free_response_to_pool(resp);
-	mbox_notify_tx_done(chan, msg, -1, err);
+	mbox_notify_tx_done(chan, msg, -1, -ETIMEDOUT);
 }
 
 static void __tcs_buffer_write(struct tcs_drv *drv, int d, int m, int n,
diff --git a/drivers/media/platform/msm/vidc/msm_venc.c b/drivers/media/platform/msm/vidc/msm_venc.c
index 7bfd255..dccfe74 100644
--- a/drivers/media/platform/msm/vidc/msm_venc.c
+++ b/drivers/media/platform/msm/vidc/msm_venc.c
@@ -352,20 +352,6 @@
 		.qmenu = mpeg_video_rate_control,
 	},
 	{
-		.id = V4L2_CID_MPEG_VIDEO_BITRATE_MODE,
-		.name = "Bitrate Control",
-		.type = V4L2_CTRL_TYPE_MENU,
-		.minimum = V4L2_MPEG_VIDEO_BITRATE_MODE_VBR,
-		.maximum = V4L2_MPEG_VIDEO_BITRATE_MODE_CBR,
-		.default_value = V4L2_MPEG_VIDEO_BITRATE_MODE_VBR,
-		.step = 0,
-		.menu_skip_mask = ~(
-		(1 << V4L2_MPEG_VIDEO_BITRATE_MODE_VBR) |
-		(1 << V4L2_MPEG_VIDEO_BITRATE_MODE_CBR)
-		),
-		.qmenu = NULL,
-	},
-	{
 		.id = V4L2_CID_MPEG_VIDEO_BITRATE,
 		.name = "Bit Rate",
 		.type = V4L2_CTRL_TYPE_INTEGER,
@@ -1269,62 +1255,10 @@
 		pdata = &request_iframe;
 		break;
 	case V4L2_CID_MPEG_VIDC_VIDEO_RATE_CONTROL:
-	case V4L2_CID_MPEG_VIDEO_BITRATE_MODE:
 	{
-		int final_mode = 0;
-		struct v4l2_ctrl update_ctrl;
-
-		update_ctrl.id = 0;
-		update_ctrl.val = 0;
-
-		/* V4L2_CID_MPEG_VIDEO_BITRATE_MODE and _RATE_CONTROL
-		 * manipulate the same thing.  If one control's state
-		 * changes, try to mirror the state in the other control's
-		 * value
-		 */
-		if (ctrl->id == V4L2_CID_MPEG_VIDEO_BITRATE_MODE) {
-			if (ctrl->val == V4L2_MPEG_VIDEO_BITRATE_MODE_VBR) {
-				final_mode = HAL_RATE_CONTROL_VBR_CFR;
-				update_ctrl.val =
-				V4L2_CID_MPEG_VIDC_VIDEO_RATE_CONTROL_VBR_CFR;
-			} else {/* ...if (ctrl->val == _BITRATE_MODE_CBR) */
-				final_mode = HAL_RATE_CONTROL_CBR_CFR;
-				update_ctrl.val =
-				V4L2_CID_MPEG_VIDC_VIDEO_RATE_CONTROL_CBR_CFR;
-			}
-
-			update_ctrl.id = V4L2_CID_MPEG_VIDC_VIDEO_RATE_CONTROL;
-
-		} else if (ctrl->id == V4L2_CID_MPEG_VIDC_VIDEO_RATE_CONTROL) {
-			switch (ctrl->val) {
-			case V4L2_CID_MPEG_VIDC_VIDEO_RATE_CONTROL_OFF:
-			case V4L2_CID_MPEG_VIDC_VIDEO_RATE_CONTROL_VBR_VFR:
-			case V4L2_CID_MPEG_VIDC_VIDEO_RATE_CONTROL_VBR_CFR:
-				update_ctrl.val =
-					V4L2_MPEG_VIDEO_BITRATE_MODE_VBR;
-				break;
-			case V4L2_CID_MPEG_VIDC_VIDEO_RATE_CONTROL_CBR_VFR:
-			case V4L2_CID_MPEG_VIDC_VIDEO_RATE_CONTROL_CBR_CFR:
-			case V4L2_CID_MPEG_VIDC_VIDEO_RATE_CONTROL_MBR_CFR:
-			case V4L2_CID_MPEG_VIDC_VIDEO_RATE_CONTROL_MBR_VFR:
-				update_ctrl.val =
-					V4L2_MPEG_VIDEO_BITRATE_MODE_CBR;
-				break;
-			}
-
-			final_mode = ctrl->val;
-			update_ctrl.id = V4L2_CID_MPEG_VIDEO_BITRATE_MODE;
-		}
-
-		if (update_ctrl.id) {
-			temp_ctrl = TRY_GET_CTRL(update_ctrl.id);
-			temp_ctrl->val = update_ctrl.val;
-		}
-
 		property_id = HAL_PARAM_VENC_RATE_CONTROL;
-		property_val = final_mode;
+		property_val = ctrl->val;
 		pdata = &property_val;
-
 		break;
 	}
 	case V4L2_CID_MPEG_VIDEO_BITRATE:
diff --git a/drivers/media/platform/msm/vidc/msm_vidc_common.c b/drivers/media/platform/msm/vidc/msm_vidc_common.c
index f002e76..8f77ebb 100644
--- a/drivers/media/platform/msm/vidc/msm_vidc_common.c
+++ b/drivers/media/platform/msm/vidc/msm_vidc_common.c
@@ -41,6 +41,8 @@
 	"Extradata none",
 	"Extradata MB Quantization",
 	"Extradata Interlace Video",
+	"Reserved",
+	"Reserved",
 	"Extradata timestamp",
 	"Extradata S3D Frame Packing",
 	"Extradata Frame Rate",
diff --git a/drivers/mmc/Kconfig b/drivers/mmc/Kconfig
index f2eeb38..9116551 100644
--- a/drivers/mmc/Kconfig
+++ b/drivers/mmc/Kconfig
@@ -19,6 +19,14 @@
 	  This is an option for use by developers; most people should
 	  say N here.  This enables MMC core and driver debugging.
 
+config MMC_PERF_PROFILING
+	bool "MMC performance profiling"
+	depends on MMC != n
+	default n
+	help
+	  If you say Y here, support will be added for collecting
+	  performance numbers at the MMC Queue and Host layers.
+
 if MMC
 
 source "drivers/mmc/core/Kconfig"
diff --git a/drivers/mmc/card/Kconfig b/drivers/mmc/card/Kconfig
index 6142ec1..91f2445 100644
--- a/drivers/mmc/card/Kconfig
+++ b/drivers/mmc/card/Kconfig
@@ -50,6 +50,17 @@
 
 	  If unsure, say Y here.
 
+config MMC_BLOCK_DEFERRED_RESUME
+	bool "Defer MMC layer resume until I/O is requested"
+	depends on MMC_BLOCK
+	default n
+	help
+	  Say Y here to enable deferred MMC resume until I/O
+	  is requested.
+
+	  This will reduce overall resume latency and
+	  save power when there is an SD card inserted but not being used.
+
 config SDIO_UART
 	tristate "SDIO UART/GPS class support"
 	depends on TTY
diff --git a/drivers/mmc/card/Makefile b/drivers/mmc/card/Makefile
index c73b406..d55107f 100644
--- a/drivers/mmc/card/Makefile
+++ b/drivers/mmc/card/Makefile
@@ -8,3 +8,4 @@
 
 obj-$(CONFIG_SDIO_UART)		+= sdio_uart.o
 
+obj-$(CONFIG_MMC_BLOCK_TEST)		+= mmc_block_test.o
diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index ace47ae..dba5c41 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -30,15 +30,18 @@
 #include <linux/blkdev.h>
 #include <linux/mutex.h>
 #include <linux/scatterlist.h>
+#include <linux/bitops.h>
 #include <linux/string_helpers.h>
 #include <linux/delay.h>
 #include <linux/capability.h>
 #include <linux/compat.h>
 #include <linux/pm_runtime.h>
+#include <linux/ioprio.h>
 #include <linux/idr.h>
 
 #include <linux/mmc/ioctl.h>
 #include <linux/mmc/card.h>
+#include <linux/mmc/core.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/sd.h>
@@ -60,15 +63,33 @@
 #define INAND_CMD38_ARG_SECERASE 0x80
 #define INAND_CMD38_ARG_SECTRIM1 0x81
 #define INAND_CMD38_ARG_SECTRIM2 0x88
-#define MMC_BLK_TIMEOUT_MS  (10 * 60 * 1000)        /* 10 minute timeout */
+#define MMC_BLK_TIMEOUT_MS  (30 * 1000)        /* 30 sec timeout */
 #define MMC_SANITIZE_REQ_TIMEOUT 240000
 #define MMC_EXTRACT_INDEX_FROM_ARG(x) ((x & 0x00FF0000) >> 16)
+#define MMC_CMDQ_STOP_TIMEOUT_MS 100
 
 #define mmc_req_rel_wr(req)	((req->cmd_flags & REQ_FUA) && \
 				  (rq_data_dir(req) == WRITE))
 #define PACKED_CMD_VER	0x01
 #define PACKED_CMD_WR	0x02
+#define PACKED_TRIGGER_MAX_ELEMENTS	5000
 
+#define MMC_BLK_MAX_RETRIES 5 /* max # of retries before aborting a command */
+#define MMC_BLK_UPDATE_STOP_REASON(stats, reason)			\
+	do {								\
+		if (stats->enabled)					\
+			stats->pack_stop_reason[reason]++;		\
+	} while (0)
+
+#define MAX_RETRIES 5
+#define PCKD_TRGR_INIT_MEAN_POTEN	17
+#define PCKD_TRGR_POTEN_LOWER_BOUND	5
+#define PCKD_TRGR_URGENT_PENALTY	2
+#define PCKD_TRGR_LOWER_BOUND		5
+#define PCKD_TRGR_PRECISION_MULTIPLIER	100
+
+static struct mmc_cmdq_req *mmc_cmdq_prep_dcmd(
+		struct mmc_queue_req *mqrq, struct mmc_queue *mq);
 static DEFINE_MUTEX(block_mutex);
 
 /*
@@ -103,6 +124,7 @@
 #define MMC_BLK_CMD23	(1 << 0)	/* Can do SET_BLOCK_COUNT for multiblock */
 #define MMC_BLK_REL_WR	(1 << 1)	/* MMC Reliable write support */
 #define MMC_BLK_PACKED_CMD	(1 << 2)	/* MMC packed command support */
+#define MMC_BLK_CMD_QUEUE	(1 << 3) /* MMC command queue support */
 
 	unsigned int	usage;
 	unsigned int	read_only;
@@ -112,6 +134,8 @@
 #define MMC_BLK_WRITE		BIT(1)
 #define MMC_BLK_DISCARD		BIT(2)
 #define MMC_BLK_SECDISCARD	BIT(3)
+#define MMC_BLK_FLUSH		BIT(4)
+#define MMC_BLK_PARTSWITCH	BIT(5)
 
 	/*
 	 * Only set in main mmc_blk_data associated
@@ -121,6 +145,8 @@
 	unsigned int	part_curr;
 	struct device_attribute force_ro;
 	struct device_attribute power_ro_lock;
+	struct device_attribute num_wr_reqs_to_start_packing;
+	struct device_attribute no_pack_for_random;
 	int	area_type;
 };
 
@@ -138,6 +164,8 @@
 static inline int mmc_blk_part_switch(struct mmc_card *card,
 				      struct mmc_blk_data *md);
 static int get_card_status(struct mmc_card *card, u32 *status, int retries);
+static int mmc_blk_cmdq_switch(struct mmc_card *card,
+			       struct mmc_blk_data *md, bool enable);
 
 static inline void mmc_blk_clear_packed(struct mmc_queue_req *mqrq)
 {
@@ -194,9 +222,13 @@
 {
 	int ret;
 	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
-	struct mmc_card *card = md->queue.card;
+	struct mmc_card *card;
 	int locked = 0;
 
+	if (!md)
+		return -EINVAL;
+
+	card = md->queue.card;
 	if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PERM_WP_EN)
 		locked = 2;
 	else if (card->ext_csd.boot_ro_lock & EXT_CSD_BOOT_WP_B_PWR_WP_EN)
@@ -224,6 +256,8 @@
 		return count;
 
 	md = mmc_blk_get(dev_to_disk(dev));
+	if (!md)
+		return -EINVAL;
 	card = md->queue.card;
 
 	mmc_get_card(card);
@@ -261,6 +295,9 @@
 	int ret;
 	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
 
+	if (!md)
+		return -EINVAL;
+
 	ret = snprintf(buf, PAGE_SIZE, "%d\n",
 		       get_disk_ro(dev_to_disk(dev)) ^
 		       md->read_only);
@@ -275,6 +312,10 @@
 	char *end;
 	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
 	unsigned long set = simple_strtoul(buf, &end, 0);
+
+	if (!md)
+		return -EINVAL;
+
 	if (end == buf) {
 		ret = -EINVAL;
 		goto out;
@@ -287,6 +328,119 @@
 	return ret;
 }
 
+static ssize_t
+no_pack_for_random_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	int ret;
+
+	if (!md)
+		return -EINVAL;
+	ret = snprintf(buf, PAGE_SIZE, "%d\n", md->queue.no_pack_for_random);
+
+	mmc_blk_put(md);
+	return ret;
+}
+
+static ssize_t
+no_pack_for_random_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	int value;
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	struct mmc_card *card;
+	int ret = count;
+
+	if (!md)
+		return -EINVAL;
+
+	card = md->queue.card;
+	if (!card) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	sscanf(buf, "%d", &value);
+
+	if (value < 0) {
+		pr_err("%s: value %d is not valid. old value remains = %d",
+			mmc_hostname(card->host), value,
+			md->queue.no_pack_for_random);
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	md->queue.no_pack_for_random = (value > 0) ?  true : false;
+
+	pr_debug("%s: no_pack_for_random: new value = %d",
+		mmc_hostname(card->host),
+		md->queue.no_pack_for_random);
+
+exit:
+	mmc_blk_put(md);
+	return ret;
+}
+
+static ssize_t
+num_wr_reqs_to_start_packing_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	int num_wr_reqs_to_start_packing;
+	int ret;
+
+	if (!md)
+		return -EINVAL;
+	num_wr_reqs_to_start_packing = md->queue.num_wr_reqs_to_start_packing;
+
+	ret = snprintf(buf, PAGE_SIZE, "%d\n", num_wr_reqs_to_start_packing);
+
+	mmc_blk_put(md);
+	return ret;
+}
+
+static ssize_t
+num_wr_reqs_to_start_packing_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t count)
+{
+	int value;
+	struct mmc_blk_data *md = mmc_blk_get(dev_to_disk(dev));
+	struct mmc_card *card;
+	int ret = count;
+
+	if (!md)
+		return -EINVAL;
+
+	card = md->queue.card;
+	if (!card) {
+		ret = -EINVAL;
+		goto exit;
+	}
+
+	sscanf(buf, "%d", &value);
+
+	if (value >= 0) {
+		md->queue.num_wr_reqs_to_start_packing =
+		    min_t(int, value, (int)card->ext_csd.max_packed_writes);
+
+		pr_debug("%s: trigger to pack: new value = %d",
+			mmc_hostname(card->host),
+			md->queue.num_wr_reqs_to_start_packing);
+	} else {
+		pr_err("%s: value %d is not valid. old value remains = %d",
+			mmc_hostname(card->host), value,
+			md->queue.num_wr_reqs_to_start_packing);
+		ret = -EINVAL;
+	}
+
+exit:
+	mmc_blk_put(md);
+	return ret;
+}
+
 #ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
 
 static int max_read_speed, max_write_speed, cache_size = 4;
@@ -680,11 +834,12 @@
 {
 	int err;
 
-	if (!mmc_can_sanitize(card)) {
-			pr_warn("%s: %s - SANITIZE is not supported\n",
+	if (!mmc_can_sanitize(card) &&
+			(card->host->caps2 & MMC_CAP2_SANITIZE)) {
+		pr_warn("%s: %s - SANITIZE is not supported\n",
 				mmc_hostname(card->host), __func__);
-			err = -EOPNOTSUPP;
-			goto out;
+		err = -EOPNOTSUPP;
+		goto out;
 	}
 
 	pr_debug("%s: %s - SANITIZE IN PROGRESS...\n",
@@ -712,15 +867,10 @@
 	struct mmc_request mrq = {NULL};
 	struct scatterlist sg;
 	int err;
-	int is_rpmb = false;
-	u32 status = 0;
 
 	if (!card || !md || !idata)
 		return -EINVAL;
 
-	if (md->area_type & MMC_BLK_DATA_AREA_RPMB)
-		is_rpmb = true;
-
 	cmd.opcode = idata->ic.opcode;
 	cmd.arg = idata->ic.arg;
 	cmd.flags = idata->ic.flags;
@@ -763,6 +913,15 @@
 
 	mrq.cmd = &cmd;
 
+	if (mmc_card_doing_bkops(card)) {
+		err = mmc_stop_bkops(card);
+		if (err) {
+			dev_err(mmc_dev(card->host),
+				"%s: stop_bkops failed %d\n", __func__, err);
+			return err;
+		}
+	}
+
 	err = mmc_blk_part_switch(card, md);
 	if (err)
 		return err;
@@ -773,13 +932,6 @@
 			return err;
 	}
 
-	if (is_rpmb) {
-		err = mmc_set_blockcount(card, data.blocks,
-			idata->ic.write_flag & (1 << 31));
-		if (err)
-			return err;
-	}
-
 	if ((MMC_EXTRACT_INDEX_FROM_ARG(cmd.arg) == EXT_CSD_SANITIZE_START) &&
 	    (cmd.opcode == MMC_SWITCH)) {
 		err = ioctl_do_sanitize(card);
@@ -813,7 +965,183 @@
 
 	memcpy(&(idata->ic.response), cmd.resp, sizeof(cmd.resp));
 
-	if (is_rpmb) {
+	return err;
+}
+
+struct mmc_blk_ioc_rpmb_data {
+	struct mmc_blk_ioc_data *data[MMC_IOC_MAX_RPMB_CMD];
+};
+
+static struct mmc_blk_ioc_rpmb_data *mmc_blk_ioctl_rpmb_copy_from_user(
+	struct mmc_ioc_rpmb __user *user)
+{
+	struct mmc_blk_ioc_rpmb_data *idata;
+	int err, i;
+
+	idata = kzalloc(sizeof(*idata), GFP_KERNEL);
+	if (!idata) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < MMC_IOC_MAX_RPMB_CMD; i++) {
+		idata->data[i] = mmc_blk_ioctl_copy_from_user(&(user->cmds[i]));
+		if (IS_ERR(idata->data[i])) {
+			err = PTR_ERR(idata->data[i]);
+			goto copy_err;
+		}
+	}
+
+	return idata;
+
+copy_err:
+	while (--i >= 0) {
+		kfree(idata->data[i]->buf);
+		kfree(idata->data[i]);
+	}
+	kfree(idata);
+out:
+	return ERR_PTR(err);
+}
+
+static int mmc_blk_ioctl_rpmb_cmd(struct block_device *bdev,
+	struct mmc_ioc_rpmb __user *ic_ptr)
+{
+	struct mmc_blk_ioc_rpmb_data *idata;
+	struct mmc_blk_data *md;
+	struct mmc_card *card = NULL;
+	struct mmc_command cmd = {0};
+	struct mmc_data data = {0};
+	struct mmc_request mrq = {NULL};
+	struct scatterlist sg;
+	int err = 0, i = 0;
+	u32 status = 0;
+
+	/* The caller must have CAP_SYS_RAWIO */
+	if (!capable(CAP_SYS_RAWIO))
+		return -EPERM;
+
+	md = mmc_blk_get(bdev->bd_disk);
+	/* make sure this is a rpmb partition */
+	if ((!md) || (!(md->area_type & MMC_BLK_DATA_AREA_RPMB))) {
+		err = -EINVAL;
+		return err;
+	}
+
+	idata = mmc_blk_ioctl_rpmb_copy_from_user(ic_ptr);
+	if (IS_ERR(idata)) {
+		err = PTR_ERR(idata);
+		goto cmd_done;
+	}
+
+	card = md->queue.card;
+	if (IS_ERR(card)) {
+		err = PTR_ERR(card);
+		goto idata_free;
+	}
+
+	mmc_get_card(card);
+
+	if (mmc_card_doing_bkops(card)) {
+		if (mmc_card_cmdq(card)) {
+			err = mmc_cmdq_halt(card->host, true);
+			if (err)
+				goto cmd_rel_host;
+		}
+		err = mmc_stop_bkops(card);
+		if (err) {
+			dev_err(mmc_dev(card->host),
+				"%s: stop_bkops failed %d\n", __func__, err);
+			goto cmd_rel_host;
+		}
+		if (mmc_card_cmdq(card)) {
+			err = mmc_cmdq_halt(card->host, false);
+			if (err)
+				goto cmd_rel_host;
+		}
+	}
+
+	err = mmc_blk_part_switch(card, md);
+	if (err)
+		goto cmd_rel_host;
+
+	for (i = 0; i < MMC_IOC_MAX_RPMB_CMD; i++) {
+		struct mmc_blk_ioc_data *curr_data;
+		struct mmc_ioc_cmd *curr_cmd;
+
+		curr_data = idata->data[i];
+		curr_cmd = &curr_data->ic;
+		if (!curr_cmd->opcode)
+			break;
+
+		cmd.opcode = curr_cmd->opcode;
+		cmd.arg = curr_cmd->arg;
+		cmd.flags = curr_cmd->flags;
+
+		if (curr_data->buf_bytes) {
+			data.sg = &sg;
+			data.sg_len = 1;
+			data.blksz = curr_cmd->blksz;
+			data.blocks = curr_cmd->blocks;
+
+			sg_init_one(data.sg, curr_data->buf,
+					curr_data->buf_bytes);
+
+			if (curr_cmd->write_flag)
+				data.flags = MMC_DATA_WRITE;
+			else
+				data.flags = MMC_DATA_READ;
+
+			/* data.flags must already be set before doing this. */
+			mmc_set_data_timeout(&data, card);
+
+			/*
+			 * Allow overriding the timeout_ns for empirical tuning.
+			 */
+			if (curr_cmd->data_timeout_ns)
+				data.timeout_ns = curr_cmd->data_timeout_ns;
+
+			mrq.data = &data;
+		}
+
+		mrq.cmd = &cmd;
+
+		err = mmc_set_blockcount(card, data.blocks,
+				curr_cmd->write_flag & (1 << 31));
+		if (err)
+			goto cmd_rel_host;
+
+		mmc_wait_for_req(card->host, &mrq);
+
+		if (cmd.error) {
+			dev_err(mmc_dev(card->host), "%s: cmd error %d\n",
+					__func__, cmd.error);
+			err = cmd.error;
+			goto cmd_rel_host;
+		}
+		if (data.error) {
+			dev_err(mmc_dev(card->host), "%s: data error %d\n",
+					__func__, data.error);
+			err = data.error;
+			goto cmd_rel_host;
+		}
+
+		if (copy_to_user(&(ic_ptr->cmds[i].response), cmd.resp,
+					sizeof(cmd.resp))) {
+			err = -EFAULT;
+			goto cmd_rel_host;
+		}
+
+		if (!curr_cmd->write_flag) {
+			if (copy_to_user((void __user *)(unsigned long)
+						curr_cmd->data_ptr,
+						curr_data->buf,
+						curr_data->buf_bytes)) {
+				err = -EFAULT;
+				goto cmd_rel_host;
+			}
+		}
+
 		/*
 		 * Ensure RPMB command has completed by polling CMD13
 		 * "Send Status".
@@ -825,6 +1153,20 @@
 					__func__, status, err);
 	}
 
+cmd_rel_host:
+	mmc_put_card(card);
+
+idata_free:
+	for (i = 0; i < MMC_IOC_MAX_RPMB_CMD; i++) {
+		kfree(idata->data[i]->buf);
+		kfree(idata->data[i]);
+	}
+	kfree(idata);
+
+cmd_done:
+	mmc_blk_put(md);
+	if (card && card->cmdq_init)
+		wake_up(&card->host->cmdq_ctx.wait);
 	return err;
 }
 
@@ -845,7 +1187,7 @@
 		return -EPERM;
 
 	idata = mmc_blk_ioctl_copy_from_user(ic_ptr);
-	if (IS_ERR(idata))
+	if (IS_ERR_OR_NULL(idata))
 		return PTR_ERR(idata);
 
 	md = mmc_blk_get(bdev->bd_disk);
@@ -855,13 +1197,24 @@
 	}
 
 	card = md->queue.card;
-	if (IS_ERR(card)) {
+	if (IS_ERR_OR_NULL(card)) {
 		err = PTR_ERR(card);
 		goto cmd_done;
 	}
 
 	mmc_get_card(card);
 
+	if (mmc_card_cmdq(card)) {
+		err = mmc_cmdq_halt_on_empty_queue(card->host);
+		if (err) {
+			pr_err("%s: halt failed while doing %s err (%d)\n",
+					mmc_hostname(card->host),
+					__func__, err);
+			mmc_put_card(card);
+			goto cmd_done;
+		}
+	}
+
 	ioc_err = __mmc_blk_ioctl_cmd(card, md, idata);
 
 	/* Always switch back to main area after RPMB access */
@@ -872,6 +1225,12 @@
 
 	err = mmc_blk_ioctl_copy_to_user(ic_ptr, idata);
 
+	if (mmc_card_cmdq(card)) {
+		if (mmc_cmdq_halt(card->host, false))
+			pr_err("%s: %s: cmdq unhalt failed\n",
+			       mmc_hostname(card->host), __func__);
+	}
+
 cmd_done:
 	mmc_blk_put(md);
 cmd_err:
@@ -963,6 +1322,9 @@
 	case MMC_IOC_CMD:
 		return mmc_blk_ioctl_cmd(bdev,
 				(struct mmc_ioc_cmd __user *)arg);
+	case MMC_IOC_RPMB_CMD:
+		return mmc_blk_ioctl_rpmb_cmd(bdev,
+				(struct mmc_ioc_rpmb __user *)arg);
 	case MMC_IOC_MULTI_CMD:
 		return mmc_blk_ioctl_multi_cmd(bdev,
 				(struct mmc_ioc_multi_cmd __user *)arg);
@@ -990,18 +1352,76 @@
 #endif
 };
 
+static int mmc_blk_cmdq_switch(struct mmc_card *card,
+			       struct mmc_blk_data *md, bool enable)
+{
+	int ret = 0;
+	bool cmdq_mode = !!mmc_card_cmdq(card);
+	struct mmc_host *host = card->host;
+	struct mmc_cmdq_context_info *ctx = &host->cmdq_ctx;
+
+	if (!(card->host->caps2 & MMC_CAP2_CMD_QUEUE) ||
+	    !card->ext_csd.cmdq_support ||
+	    (enable && !(md->flags & MMC_BLK_CMD_QUEUE)) ||
+	    (cmdq_mode == enable))
+		return 0;
+
+	if (enable) {
+		ret = mmc_set_blocklen(card, MMC_CARD_CMDQ_BLK_SIZE);
+		if (ret) {
+			pr_err("%s: failed (%d) to set block-size to %d\n",
+			       __func__, ret, MMC_CARD_CMDQ_BLK_SIZE);
+			goto out;
+		}
+
+	} else {
+		if (!test_bit(CMDQ_STATE_HALT, &ctx->curr_state)) {
+			ret = mmc_cmdq_halt(host, true);
+			if (ret) {
+				pr_err("%s: halt: failed: %d\n",
+					mmc_hostname(host), ret);
+				goto out;
+			}
+		}
+	}
+
+	ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			 EXT_CSD_CMDQ, enable,
+			 card->ext_csd.generic_cmd6_time);
+	if (ret) {
+		pr_err("%s: cmdq mode %sable failed %d\n",
+		       md->disk->disk_name, enable ? "en" : "dis", ret);
+		goto out;
+	}
+
+	if (enable)
+		mmc_card_set_cmdq(card);
+	else
+		mmc_card_clr_cmdq(card);
+out:
+	return ret;
+}
+
 static inline int mmc_blk_part_switch(struct mmc_card *card,
 				      struct mmc_blk_data *md)
 {
 	int ret;
 	struct mmc_blk_data *main_md = dev_get_drvdata(&card->dev);
 
-	if (main_md->part_curr == md->part_type)
+	if ((main_md->part_curr == md->part_type) &&
+	    (card->part_curr == md->part_type))
 		return 0;
 
 	if (mmc_card_mmc(card)) {
 		u8 part_config = card->ext_csd.part_config;
 
+		if (md->part_type) {
+			/* disable CQ mode for non-user data partitions */
+			ret = mmc_blk_cmdq_switch(card, md, false);
+			if (ret)
+				return ret;
+		}
+
 		if (md->part_type == EXT_CSD_PART_CONFIG_ACC_RPMB)
 			mmc_retune_pause(card->host);
 
@@ -1012,12 +1432,16 @@
 				 EXT_CSD_PART_CONFIG, part_config,
 				 card->ext_csd.part_time);
 		if (ret) {
+			pr_err("%s: mmc_blk_part_switch failure, %d -> %d\n",
+				mmc_hostname(card->host), main_md->part_curr,
+					md->part_type);
 			if (md->part_type == EXT_CSD_PART_CONFIG_ACC_RPMB)
 				mmc_retune_unpause(card->host);
 			return ret;
 		}
 
 		card->ext_csd.part_config = part_config;
+		card->part_curr = md->part_type;
 
 		if (main_md->part_curr == EXT_CSD_PART_CONFIG_ACC_RPMB)
 			mmc_retune_unpause(card->host);
@@ -1201,18 +1625,20 @@
 	switch (error) {
 	case -EILSEQ:
 		/* response crc error, retry the r/w cmd */
-		pr_err("%s: %s sending %s command, card status %#x\n",
-			req->rq_disk->disk_name, "response CRC error",
+		pr_err_ratelimited(
+			"%s: response CRC error sending %s command, card status %#x\n",
+			req->rq_disk->disk_name,
 			name, status);
 		return ERR_RETRY;
 
 	case -ETIMEDOUT:
-		pr_err("%s: %s sending %s command, card status %#x\n",
-			req->rq_disk->disk_name, "timed out", name, status);
+		pr_err_ratelimited(
+			"%s: timed out sending %s command, card status %#x\n",
+			req->rq_disk->disk_name, name, status);
 
 		/* If the status cmd initially failed, retry the r/w cmd */
 		if (!status_valid) {
-			pr_err("%s: status not valid, retrying timeout\n",
+			pr_err_ratelimited("%s: status not valid, retrying timeout\n",
 				req->rq_disk->disk_name);
 			return ERR_RETRY;
 		}
@@ -1223,17 +1649,22 @@
 		 * have corrected the state problem above.
 		 */
 		if (status & (R1_COM_CRC_ERROR | R1_ILLEGAL_COMMAND)) {
-			pr_err("%s: command error, retrying timeout\n",
+			pr_err_ratelimited(
+				"%s: command error, retrying timeout\n",
 				req->rq_disk->disk_name);
 			return ERR_RETRY;
 		}
 
 		/* Otherwise abort the command */
+		pr_err_ratelimited(
+			"%s: not retrying timeout\n",
+			req->rq_disk->disk_name);
 		return ERR_ABORT;
 
 	default:
 		/* We don't understand the error code the driver gave us */
-		pr_err("%s: unknown error %d sending read/write command, card status %#x\n",
+		pr_err_ratelimited(
+			"%s: unknown error %d sending read/write command, card status %#x\n",
 		       req->rq_disk->disk_name, error, status);
 		return ERR_ABORT;
 	}
@@ -1372,8 +1803,15 @@
 
 	md->reset_done |= type;
 	err = mmc_hw_reset(host);
+	if (err && err != -EOPNOTSUPP) {
+		/* We failed to reset so we need to abort the request */
+		pr_err("%s: %s: failed to reset %d\n", mmc_hostname(host),
+				__func__, err);
+		return -ENODEV;
+	}
+
 	/* Ensure we switch back to the correct partition */
-	if (err != -EOPNOTSUPP) {
+	if (host->card) {
 		struct mmc_blk_data *main_md =
 			dev_get_drvdata(&host->card->dev);
 		int part_err;
@@ -1408,6 +1846,77 @@
 	return false;
 }
 
+static struct mmc_cmdq_req *mmc_blk_cmdq_prep_discard_req(struct mmc_queue *mq,
+						struct request *req)
+{
+	struct mmc_blk_data *md = mq->data;
+	struct mmc_card *card = md->queue.card;
+	struct mmc_host *host = card->host;
+	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
+	struct mmc_cmdq_req *cmdq_req;
+	struct mmc_queue_req *active_mqrq;
+
+	BUG_ON(req->tag > card->ext_csd.cmdq_depth);
+	BUG_ON(test_and_set_bit(req->tag, &host->cmdq_ctx.active_reqs));
+
+	set_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx_info->curr_state);
+
+	active_mqrq = &mq->mqrq_cmdq[req->tag];
+	active_mqrq->req = req;
+
+	cmdq_req = mmc_cmdq_prep_dcmd(active_mqrq, mq);
+	cmdq_req->cmdq_req_flags |= QBR;
+	cmdq_req->mrq.cmd = &cmdq_req->cmd;
+	cmdq_req->tag = req->tag;
+	return cmdq_req;
+}
+
+static int mmc_blk_cmdq_issue_discard_rq(struct mmc_queue *mq,
+					struct request *req)
+{
+	struct mmc_blk_data *md = mq->data;
+	struct mmc_card *card = md->queue.card;
+	struct mmc_cmdq_req *cmdq_req = NULL;
+	unsigned int from, nr, arg;
+	int err = 0;
+
+	if (!mmc_can_erase(card)) {
+		err = -EOPNOTSUPP;
+		blk_end_request(req, err, blk_rq_bytes(req));
+		goto out;
+	}
+
+	from = blk_rq_pos(req);
+	nr = blk_rq_sectors(req);
+
+	if (mmc_can_discard(card))
+		arg = MMC_DISCARD_ARG;
+	else if (mmc_can_trim(card))
+		arg = MMC_TRIM_ARG;
+	else
+		arg = MMC_ERASE_ARG;
+
+	cmdq_req = mmc_blk_cmdq_prep_discard_req(mq, req);
+	if (card->quirks & MMC_QUIRK_INAND_CMD38) {
+		__mmc_switch_cmdq_mode(cmdq_req->mrq.cmd,
+				EXT_CSD_CMD_SET_NORMAL,
+				INAND_CMD38_ARG_EXT_CSD,
+				arg == MMC_TRIM_ARG ?
+				INAND_CMD38_ARG_TRIM :
+				INAND_CMD38_ARG_ERASE,
+				0, true, false);
+		err = mmc_cmdq_wait_for_dcmd(card->host, cmdq_req);
+		if (err)
+			goto clear_dcmd;
+	}
+	err = mmc_cmdq_erase(cmdq_req, card, from, nr, arg);
+clear_dcmd:
+	mmc_host_clk_hold(card->host);
+	blk_complete_request(req);
+out:
+	return err ? 1 : 0;
+}
+
 static int mmc_blk_issue_discard_rq(struct mmc_queue *mq, struct request *req)
 {
 	struct mmc_blk_data *md = mq->data;
@@ -1451,6 +1960,69 @@
 	return err ? 0 : 1;
 }
 
+static int mmc_blk_cmdq_issue_secdiscard_rq(struct mmc_queue *mq,
+				       struct request *req)
+{
+	struct mmc_blk_data *md = mq->data;
+	struct mmc_card *card = md->queue.card;
+	struct mmc_cmdq_req *cmdq_req = NULL;
+	unsigned int from, nr, arg;
+	int err = 0;
+
+	if (!(mmc_can_secure_erase_trim(card))) {
+		err = -EOPNOTSUPP;
+		blk_end_request(req, err, blk_rq_bytes(req));
+		goto out;
+	}
+
+	from = blk_rq_pos(req);
+	nr = blk_rq_sectors(req);
+
+	if (mmc_can_trim(card) && !mmc_erase_group_aligned(card, from, nr))
+		arg = MMC_SECURE_TRIM1_ARG;
+	else
+		arg = MMC_SECURE_ERASE_ARG;
+
+	cmdq_req = mmc_blk_cmdq_prep_discard_req(mq, req);
+	if (card->quirks & MMC_QUIRK_INAND_CMD38) {
+		__mmc_switch_cmdq_mode(cmdq_req->mrq.cmd,
+				EXT_CSD_CMD_SET_NORMAL,
+				INAND_CMD38_ARG_EXT_CSD,
+				arg == MMC_SECURE_TRIM1_ARG ?
+				INAND_CMD38_ARG_SECTRIM1 :
+				INAND_CMD38_ARG_SECERASE,
+				0, true, false);
+		err = mmc_cmdq_wait_for_dcmd(card->host, cmdq_req);
+		if (err)
+			goto clear_dcmd;
+	}
+
+	err = mmc_cmdq_erase(cmdq_req, card, from, nr, arg);
+	if (err)
+		goto clear_dcmd;
+
+	if (arg == MMC_SECURE_TRIM1_ARG) {
+		if (card->quirks & MMC_QUIRK_INAND_CMD38) {
+			__mmc_switch_cmdq_mode(cmdq_req->mrq.cmd,
+					EXT_CSD_CMD_SET_NORMAL,
+					INAND_CMD38_ARG_EXT_CSD,
+					INAND_CMD38_ARG_SECTRIM2,
+					0, true, false);
+			err = mmc_cmdq_wait_for_dcmd(card->host, cmdq_req);
+			if (err)
+				goto clear_dcmd;
+		}
+
+		err = mmc_cmdq_erase(cmdq_req, card, from, nr,
+				MMC_SECURE_TRIM2_ARG);
+	}
+clear_dcmd:
+	mmc_host_clk_hold(card->host);
+	blk_complete_request(req);
+out:
+	return err ? 1 : 0;
+}
+
 static int mmc_blk_issue_secdiscard_rq(struct mmc_queue *mq,
 				       struct request *req)
 {
@@ -1524,9 +2096,45 @@
 	struct mmc_card *card = md->queue.card;
 	int ret = 0;
 
-	ret = mmc_flush_cache(card);
-	if (ret)
+	if (!req)
+		return 0;
+
+	if (req->cmd_flags & REQ_BARRIER) {
+		/*
+		 * If eMMC cache flush policy is set to 1, then the device
+		 * shall flush the requests in First-In-First-Out (FIFO) order.
+		 * In this case, as per spec, the host must not send any cache
+		 * barrier requests as they are redundant and add unnecessary
+		 * overhead to both device and host.
+		 */
+		if (card->ext_csd.cache_flush_policy & 1)
+			goto end_req;
+
+		/*
+		 * In case barrier is not supported or enabled in the device,
+		 * use flush as a fallback option.
+		 */
+		ret = mmc_cache_barrier(card);
+		if (ret)
+			ret = mmc_flush_cache(card);
+	} else if (req_op(req) == REQ_OP_FLUSH) {
+		ret = mmc_flush_cache(card);
+	}
+	if (ret == -ENODEV) {
+		pr_err("%s: %s: restart mmc card",
+				req->rq_disk->disk_name, __func__);
+		if (mmc_blk_reset(md, card->host, MMC_BLK_FLUSH))
+			pr_err("%s: %s: fail to restart mmc",
+				req->rq_disk->disk_name, __func__);
+		else
+			mmc_blk_reset_success(md, MMC_BLK_FLUSH);
+	}
+
+	if (ret) {
+		pr_err("%s: %s: notify flush error to upper layers",
+				req->rq_disk->disk_name, __func__);
 		ret = -EIO;
+	}
 
 #ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
 	else if (atomic_read(&mq->cache_size)) {
@@ -1545,6 +2153,7 @@
 		}
 	}
 #endif
+end_req:
 	blk_end_request_all(req, ret);
 
 	return ret ? 0 : 1;
@@ -1773,6 +2382,7 @@
 	brq->stop.arg = 0;
 	brq->data.blocks = blk_rq_sectors(req);
 
+	brq->data.fault_injected = false;
 	/*
 	 * The block layer doesn't support all sector count
 	 * restrictions, so we need to be prepared for too big
@@ -1896,6 +2506,7 @@
 	}
 
 	mqrq->mmc_active.mrq = &brq->mrq;
+	mqrq->mmc_active.mrq->req = mqrq->req;
 	mqrq->mmc_active.err_check = mmc_blk_err_check;
 
 	mmc_queue_bounce_pre(mqrq);
@@ -1917,6 +2528,178 @@
 	return nr_segs;
 }
 
+/**
+ * mmc_blk_disable_wr_packing() - disables packing mode
+ * @mq:	MMC queue.
+ *
+ */
+void mmc_blk_disable_wr_packing(struct mmc_queue *mq)
+{
+	if (mq) {
+		mq->wr_packing_enabled = false;
+		mq->num_of_potential_packed_wr_reqs = 0;
+	}
+}
+EXPORT_SYMBOL(mmc_blk_disable_wr_packing);
+
+static int get_packed_trigger(int potential, struct mmc_card *card,
+			      struct request *req, int curr_trigger)
+{
+	static int num_mean_elements = 1;
+	static unsigned long mean_potential = PCKD_TRGR_INIT_MEAN_POTEN;
+	unsigned int trigger = curr_trigger;
+	unsigned int pckd_trgr_upper_bound = card->ext_csd.max_packed_writes;
+
+	/* scale down the upper bound to 75% */
+	pckd_trgr_upper_bound = (pckd_trgr_upper_bound * 3) / 4;
+
+	/*
+	 * since the most common calls for this function are with small
+	 * potential write values and since we don't want these calls to affect
+	 * the packed trigger, set a lower bound and ignore calls with
+	 * potential lower than that bound
+	 */
+	if (potential <= PCKD_TRGR_POTEN_LOWER_BOUND)
+		return trigger;
+
+	/*
+	 * this is to prevent integer overflow in the following calculation:
+	 * once every PACKED_TRIGGER_MAX_ELEMENTS reset the algorithm
+	 */
+	if (num_mean_elements > PACKED_TRIGGER_MAX_ELEMENTS) {
+		num_mean_elements = 1;
+		mean_potential = PCKD_TRGR_INIT_MEAN_POTEN;
+	}
+
+	/*
+	 * get next mean value based on previous mean value and current
+	 * potential packed writes. Calculation is as follows:
+	 * mean_pot[i+1] =
+	 *	((mean_pot[i] * num_mean_elem) + potential)/(num_mean_elem + 1)
+	 */
+	mean_potential *= num_mean_elements;
+	/*
+	 * add num_mean_elements so that the division of two integers doesn't
+	 * lower mean_potential too much
+	 */
+	if (potential > mean_potential)
+		mean_potential += num_mean_elements;
+	mean_potential += potential;
+	/* this is for gaining more precision when dividing two integers */
+	mean_potential *= PCKD_TRGR_PRECISION_MULTIPLIER;
+	/* this completes the mean calculation */
+	mean_potential /= ++num_mean_elements;
+	mean_potential /= PCKD_TRGR_PRECISION_MULTIPLIER;
+
+	/*
+	 * if current potential packed writes is greater than the mean potential
+	 * then the heuristic is that the following workload will contain many
+	 * write requests, therefore we lower the packed trigger. In the
+	 * opposite case we want to increase the trigger in order to get less
+	 * packing events.
+	 */
+	if (potential >= mean_potential)
+		trigger = (trigger <= PCKD_TRGR_LOWER_BOUND) ?
+				PCKD_TRGR_LOWER_BOUND : trigger - 1;
+	else
+		trigger = (trigger >= pckd_trgr_upper_bound) ?
+				pckd_trgr_upper_bound : trigger + 1;
+
+	/*
+	 * an urgent read request indicates a packed list being interrupted
+	 * by this read, therefore we aim for less packing, hence the trigger
+	 * gets increased
+	 */
+	if (req && (req->cmd_flags & REQ_URGENT) && (rq_data_dir(req) == READ))
+		trigger += PCKD_TRGR_URGENT_PENALTY;
+
+	return trigger;
+}
+
+static void mmc_blk_write_packing_control(struct mmc_queue *mq,
+					  struct request *req)
+{
+	struct mmc_host *host = mq->card->host;
+	int data_dir;
+
+	if (!(host->caps2 & MMC_CAP2_PACKED_WR))
+		return;
+
+	/* Support for the write packing on eMMC 4.5 or later */
+	if (mq->card->ext_csd.rev <= 5)
+		return;
+
+	/*
+	 * In case the packing control is not supported by the host, it should
+	 * not have an effect on the write packing. Therefore we have to enable
+	 * the write packing
+	 */
+	if (!(host->caps2 & MMC_CAP2_PACKED_WR_CONTROL)) {
+		mq->wr_packing_enabled = true;
+		return;
+	}
+
+	if (!req || (req && (req->cmd_flags & REQ_PREFLUSH))) {
+		if (mq->num_of_potential_packed_wr_reqs >
+				mq->num_wr_reqs_to_start_packing)
+			mq->wr_packing_enabled = true;
+		mq->num_wr_reqs_to_start_packing =
+			get_packed_trigger(mq->num_of_potential_packed_wr_reqs,
+					   mq->card, req,
+					   mq->num_wr_reqs_to_start_packing);
+		mq->num_of_potential_packed_wr_reqs = 0;
+		return;
+	}
+
+	data_dir = rq_data_dir(req);
+
+	if (data_dir == READ) {
+		mmc_blk_disable_wr_packing(mq);
+		mq->num_wr_reqs_to_start_packing =
+			get_packed_trigger(mq->num_of_potential_packed_wr_reqs,
+					   mq->card, req,
+					   mq->num_wr_reqs_to_start_packing);
+		mq->num_of_potential_packed_wr_reqs = 0;
+		mq->wr_packing_enabled = false;
+		return;
+	} else if (data_dir == WRITE) {
+		mq->num_of_potential_packed_wr_reqs++;
+	}
+
+	if (mq->num_of_potential_packed_wr_reqs >
+			mq->num_wr_reqs_to_start_packing)
+		mq->wr_packing_enabled = true;
+}
+
+struct mmc_wr_pack_stats *mmc_blk_get_packed_statistics(struct mmc_card *card)
+{
+	if (!card)
+		return NULL;
+
+	return &card->wr_pack_stats;
+}
+EXPORT_SYMBOL(mmc_blk_get_packed_statistics);
+
+void mmc_blk_init_packed_statistics(struct mmc_card *card)
+{
+	int max_num_of_packed_reqs = 0;
+
+	if (!card || !card->wr_pack_stats.packing_events)
+		return;
+
+	max_num_of_packed_reqs = card->ext_csd.max_packed_writes;
+
+	spin_lock(&card->wr_pack_stats.lock);
+	memset(card->wr_pack_stats.packing_events, 0,
+		(max_num_of_packed_reqs + 1) *
+	       sizeof(*card->wr_pack_stats.packing_events));
+	memset(&card->wr_pack_stats.pack_stop_reason, 0,
+		sizeof(card->wr_pack_stats.pack_stop_reason));
+	card->wr_pack_stats.enabled = true;
+	spin_unlock(&card->wr_pack_stats.lock);
+}
+EXPORT_SYMBOL(mmc_blk_init_packed_statistics);
+
 static u8 mmc_blk_prep_packed_list(struct mmc_queue *mq, struct request *req)
 {
 	struct request_queue *q = mq->queue;
@@ -1930,6 +2713,7 @@
 	bool put_back = true;
 	u8 max_packed_rw = 0;
 	u8 reqs = 0;
+	struct mmc_wr_pack_stats *stats = &card->wr_pack_stats;
 
 	/*
 	 * We don't need to check packed for any further
@@ -1946,6 +2730,9 @@
 	if (!(md->flags & MMC_BLK_PACKED_CMD))
 		goto no_packed;
 
+	if (!mq->wr_packing_enabled)
+		goto no_packed;
+
 	if ((rq_data_dir(cur) == WRITE) &&
 	    mmc_host_packed_wr(card->host))
 		max_packed_rw = card->ext_csd.max_packed_writes;
@@ -1961,6 +2748,9 @@
 	    !IS_ALIGNED(blk_rq_sectors(cur), 8))
 		goto no_packed;
 
+	if (cur->cmd_flags & REQ_FUA)
+		goto no_packed;
+
 	mmc_blk_clear_packed(mqrq);
 
 	max_blk_count = min(card->host->max_blk_count,
@@ -1977,6 +2767,7 @@
 		phys_segments += mmc_calc_packed_hdr_segs(q, card);
 	}
 
+	spin_lock(&stats->lock);
 	do {
 		if (reqs >= max_packed_rw - 1) {
 			put_back = false;
@@ -1987,34 +2778,65 @@
 		next = blk_fetch_request(q);
 		spin_unlock_irq(q->queue_lock);
 		if (!next) {
+			MMC_BLK_UPDATE_STOP_REASON(stats, EMPTY_QUEUE);
 			put_back = false;
 			break;
 		}
 
 		if (mmc_large_sector(card) &&
-		    !IS_ALIGNED(blk_rq_sectors(next), 8))
+		    !IS_ALIGNED(blk_rq_sectors(next), 8)) {
+			MMC_BLK_UPDATE_STOP_REASON(stats, LARGE_SEC_ALIGN);
 			break;
+		}
 
 		if (req_op(next) == REQ_OP_DISCARD ||
 		    req_op(next) == REQ_OP_SECURE_ERASE ||
-		    req_op(next) == REQ_OP_FLUSH)
+		    req_op(next) == REQ_OP_FLUSH) {
+			if (req_op(next) != REQ_OP_SECURE_ERASE)
+				MMC_BLK_UPDATE_STOP_REASON(stats, FLUSH_OR_DISCARD);
 			break;
+		}
 
-		if (rq_data_dir(cur) != rq_data_dir(next))
+		if (next->cmd_flags & REQ_FUA) {
+			MMC_BLK_UPDATE_STOP_REASON(stats, FUA);
 			break;
+		}
+
+		if (rq_data_dir(cur) != rq_data_dir(next)) {
+			MMC_BLK_UPDATE_STOP_REASON(stats, WRONG_DATA_DIR);
+			break;
+		}
 
 		if (mmc_req_rel_wr(next) &&
-		    (md->flags & MMC_BLK_REL_WR) && !en_rel_wr)
+		    (md->flags & MMC_BLK_REL_WR) && !en_rel_wr) {
+			MMC_BLK_UPDATE_STOP_REASON(stats, REL_WRITE);
 			break;
+		}
 
 		req_sectors += blk_rq_sectors(next);
-		if (req_sectors > max_blk_count)
+		if (req_sectors > max_blk_count) {
+			if (stats->enabled)
+				stats->pack_stop_reason[EXCEEDS_SECTORS]++;
 			break;
+		}
 
 		phys_segments +=  next->nr_phys_segments;
-		if (phys_segments > max_phys_segs)
+		if (phys_segments > max_phys_segs) {
+			MMC_BLK_UPDATE_STOP_REASON(stats, EXCEEDS_SEGMENTS);
 			break;
+		}
 
+		if (mq->no_pack_for_random) {
+			if ((blk_rq_pos(cur) + blk_rq_sectors(cur)) !=
+			    blk_rq_pos(next)) {
+				MMC_BLK_UPDATE_STOP_REASON(stats, RANDOM);
+				put_back = 1;
+				break;
+			}
+		}
+
+		if (rq_data_dir(next) == WRITE)
+			mq->num_of_potential_packed_wr_reqs++;
 		list_add_tail(&next->queuelist, &mqrq->packed->list);
 		cur = next;
 		reqs++;
@@ -2026,6 +2848,15 @@
 		spin_unlock_irq(q->queue_lock);
 	}
 
+	if (stats->enabled) {
+		if (reqs + 1 <= card->ext_csd.max_packed_writes)
+			stats->packing_events[reqs + 1]++;
+		if (reqs + 1 == max_packed_rw)
+			MMC_BLK_UPDATE_STOP_REASON(stats, THRESHOLD);
+	}
+
+	spin_unlock(&stats->lock);
+
 	if (reqs > 0) {
 		list_add(&req->queuelist, &mqrq->packed->list);
 		mqrq->packed->nr_entries = ++reqs;
@@ -2103,6 +2934,7 @@
 	brq->data.blksz = 512;
 	brq->data.blocks = packed->blocks + hdr_blocks;
 	brq->data.flags = MMC_DATA_WRITE;
+	brq->data.fault_injected = false;
 
 	brq->stop.opcode = MMC_STOP_TRANSMISSION;
 	brq->stop.arg = 0;
@@ -2114,7 +2946,18 @@
 	brq->data.sg_len = mmc_queue_map_sg(mq, mqrq);
 
 	mqrq->mmc_active.mrq = &brq->mrq;
-	mqrq->mmc_active.err_check = mmc_blk_packed_err_check;
+
+	/*
+	 * This is intended for packed commands tests usage - in case these
+	 * functions are not in use the respective pointers are NULL
+	 */
+	if (mq->err_check_fn)
+		mqrq->mmc_active.err_check = mq->err_check_fn;
+	else
+		mqrq->mmc_active.err_check = mmc_blk_packed_err_check;
+
+	if (mq->packed_test_fn)
+		mq->packed_test_fn(mq->queue, mqrq);
 
 	mmc_queue_bounce_pre(mqrq);
 }
@@ -2136,11 +2979,12 @@
 	 */
 	if (mmc_card_sd(card)) {
 		u32 blocks;
-
-		blocks = mmc_sd_num_wr_blocks(card);
-		if (blocks != (u32)-1) {
-			ret = blk_end_request(req, 0, blocks << 9);
-		}
+		if (!brq->data.fault_injected) {
+			blocks = mmc_sd_num_wr_blocks(card);
+			if (blocks != (u32)-1)
+				ret = blk_end_request(req, 0, blocks << 9);
+		} else
+			ret = blk_end_request(req, 0, brq->data.bytes_xfered);
 	} else {
 		if (!mmc_packed_cmd(mq_rq->cmd_type))
 			ret = blk_end_request(req, 0, brq->data.bytes_xfered);
@@ -2214,6 +3058,573 @@
 	mmc_blk_clear_packed(mq_rq);
 }
 
+static int mmc_blk_cmdq_start_req(struct mmc_host *host,
+				  struct mmc_cmdq_req *cmdq_req)
+{
+	struct mmc_request *mrq = &cmdq_req->mrq;
+
+	mrq->done = mmc_blk_cmdq_req_done;
+	return mmc_cmdq_start_req(host, cmdq_req);
+}
+
+/* prepare for non-data commands */
+static struct mmc_cmdq_req *mmc_cmdq_prep_dcmd(
+		struct mmc_queue_req *mqrq, struct mmc_queue *mq)
+{
+	struct request *req = mqrq->req;
+	struct mmc_cmdq_req *cmdq_req = &mqrq->cmdq_req;
+
+	memset(&mqrq->cmdq_req, 0, sizeof(struct mmc_cmdq_req));
+
+	cmdq_req->mrq.data = NULL;
+	cmdq_req->cmd_flags = req->cmd_flags;
+	cmdq_req->mrq.req = mqrq->req;
+	req->special = mqrq;
+	cmdq_req->cmdq_req_flags |= DCMD;
+	cmdq_req->mrq.cmdq_req = cmdq_req;
+
+	return &mqrq->cmdq_req;
+}
+
+
+#define IS_RT_CLASS_REQ(x)     \
+	(IOPRIO_PRIO_CLASS(req_get_ioprio(x)) == IOPRIO_CLASS_RT)
+
+static struct mmc_cmdq_req *mmc_blk_cmdq_rw_prep(
+		struct mmc_queue_req *mqrq, struct mmc_queue *mq)
+{
+	struct mmc_card *card = mq->card;
+	struct request *req = mqrq->req;
+	struct mmc_blk_data *md = mq->data;
+	bool do_rel_wr = mmc_req_rel_wr(req) && (md->flags & MMC_BLK_REL_WR);
+	bool do_data_tag;
+	bool read_dir = (rq_data_dir(req) == READ);
+	bool prio = IS_RT_CLASS_REQ(req);
+	struct mmc_cmdq_req *cmdq_rq = &mqrq->cmdq_req;
+
+	memset(&mqrq->cmdq_req, 0, sizeof(struct mmc_cmdq_req));
+
+	cmdq_rq->tag = req->tag;
+	if (read_dir) {
+		cmdq_rq->cmdq_req_flags |= DIR;
+		cmdq_rq->data.flags = MMC_DATA_READ;
+	} else {
+		cmdq_rq->data.flags = MMC_DATA_WRITE;
+	}
+	if (prio)
+		cmdq_rq->cmdq_req_flags |= PRIO;
+
+	if (do_rel_wr)
+		cmdq_rq->cmdq_req_flags |= REL_WR;
+
+	cmdq_rq->data.blocks = blk_rq_sectors(req);
+	cmdq_rq->blk_addr = blk_rq_pos(req);
+	cmdq_rq->data.blksz = MMC_CARD_CMDQ_BLK_SIZE;
+
+	mmc_set_data_timeout(&cmdq_rq->data, card);
+
+	do_data_tag = (card->ext_csd.data_tag_unit_size) &&
+		(req->cmd_flags & REQ_META) &&
+		(rq_data_dir(req) == WRITE) &&
+		((cmdq_rq->data.blocks * cmdq_rq->data.blksz) >=
+		 card->ext_csd.data_tag_unit_size);
+	if (do_data_tag)
+		cmdq_rq->cmdq_req_flags |= DAT_TAG;
+	cmdq_rq->data.sg = mqrq->sg;
+	cmdq_rq->data.sg_len = mmc_queue_map_sg(mq, mqrq);
+
+	/*
+	 * Adjust the sg list so it is the same size as the
+	 * request.
+	 */
+	if (cmdq_rq->data.blocks > card->host->max_blk_count)
+		cmdq_rq->data.blocks = card->host->max_blk_count;
+
+	if (cmdq_rq->data.blocks != blk_rq_sectors(req)) {
+		int i, data_size = cmdq_rq->data.blocks << 9;
+		struct scatterlist *sg;
+
+		for_each_sg(cmdq_rq->data.sg, sg, cmdq_rq->data.sg_len, i) {
+			data_size -= sg->length;
+			if (data_size <= 0) {
+				sg->length += data_size;
+				i++;
+				break;
+			}
+		}
+		cmdq_rq->data.sg_len = i;
+	}
+
+	mqrq->cmdq_req.cmd_flags = req->cmd_flags;
+	mqrq->cmdq_req.mrq.req = mqrq->req;
+	mqrq->cmdq_req.mrq.cmdq_req = &mqrq->cmdq_req;
+	mqrq->cmdq_req.mrq.data = &mqrq->cmdq_req.data;
+	mqrq->req->special = mqrq;
+
+	pr_debug("%s: %s: mrq: 0x%p req: 0x%p mqrq: 0x%p bytes to xf: %d mmc_cmdq_req: 0x%p card-addr: 0x%08x dir(r-1/w-0): %d\n",
+		 mmc_hostname(card->host), __func__, &mqrq->cmdq_req.mrq,
+		 mqrq->req, mqrq, (cmdq_rq->data.blocks * cmdq_rq->data.blksz),
+		 cmdq_rq, cmdq_rq->blk_addr,
+		 (cmdq_rq->cmdq_req_flags & DIR) ? 1 : 0);
+
+	return &mqrq->cmdq_req;
+}
+
+static int mmc_blk_cmdq_issue_rw_rq(struct mmc_queue *mq, struct request *req)
+{
+	struct mmc_queue_req *active_mqrq;
+	struct mmc_card *card = mq->card;
+	struct mmc_host *host = card->host;
+	struct mmc_cmdq_context_info *ctx = &host->cmdq_ctx;
+	struct mmc_cmdq_req *mc_rq;
+	u8 active_small_sector_read = 0;
+	int ret = 0;
+
+	mmc_deferred_scaling(host);
+	mmc_cmdq_clk_scaling_start_busy(host, true);
+
+	BUG_ON((req->tag < 0) || (req->tag > card->ext_csd.cmdq_depth));
+	BUG_ON(test_and_set_bit(req->tag, &host->cmdq_ctx.data_active_reqs));
+	BUG_ON(test_and_set_bit(req->tag, &host->cmdq_ctx.active_reqs));
+
+	active_mqrq = &mq->mqrq_cmdq[req->tag];
+	active_mqrq->req = req;
+
+	mc_rq = mmc_blk_cmdq_rw_prep(active_mqrq, mq);
+
+	if (card->quirks & MMC_QUIRK_CMDQ_EMPTY_BEFORE_DCMD) {
+		unsigned int sectors = blk_rq_sectors(req);
+
+		if (((sectors > 0) && (sectors < 8))
+		    && (rq_data_dir(req) == READ))
+			active_small_sector_read = 1;
+	}
+	ret = mmc_blk_cmdq_start_req(card->host, mc_rq);
+	if (!ret && active_small_sector_read)
+		host->cmdq_ctx.active_small_sector_read_reqs++;
+	/*
+	 * When in SVS2 on low load scenario and there are lots of requests
+	 * queued for CMDQ we need to wait till the queue is empty to scale
+	 * back up to Nominal even if there is a sudden increase in load.
+	 * This impacts performance where lots of IO get executed in SVS2
+	 * frequency since the queue is full. As SVS2 is a low load use case
+	 * we can serialize the requests and not queue them in parallel
+	 * without impacting other use cases. This makes sure the queue gets
+	 * empty faster and we will be able to scale up to Nominal frequency
+	 * when needed.
+	 */
+	if (!ret && (host->clk_scaling.state == MMC_LOAD_LOW))
+		wait_event_interruptible(ctx->queue_empty_wq,
+					(!ctx->active_reqs));
+
+	return ret;
+}
+
+/*
+ * Issues a flush (dcmd) request
+ */
+int mmc_blk_cmdq_issue_flush_rq(struct mmc_queue *mq, struct request *req)
+{
+	int err;
+	struct mmc_queue_req *active_mqrq;
+	struct mmc_card *card = mq->card;
+	struct mmc_host *host;
+	struct mmc_cmdq_req *cmdq_req;
+	struct mmc_cmdq_context_info *ctx_info;
+
+	BUG_ON(!card);
+	host = card->host;
+	BUG_ON(!host);
+	BUG_ON(req->tag > card->ext_csd.cmdq_depth);
+	BUG_ON(test_and_set_bit(req->tag, &host->cmdq_ctx.active_reqs));
+
+	ctx_info = &host->cmdq_ctx;
+
+	set_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx_info->curr_state);
+
+	active_mqrq = &mq->mqrq_cmdq[req->tag];
+	active_mqrq->req = req;
+
+	cmdq_req = mmc_cmdq_prep_dcmd(active_mqrq, mq);
+	cmdq_req->cmdq_req_flags |= QBR;
+	cmdq_req->mrq.cmd = &cmdq_req->cmd;
+	cmdq_req->tag = req->tag;
+
+	err = mmc_cmdq_prepare_flush(cmdq_req->mrq.cmd);
+	if (err) {
+		pr_err("%s: failed (%d) preparing flush req\n",
+		       mmc_hostname(host), err);
+		return err;
+	}
+	err = mmc_blk_cmdq_start_req(card->host, cmdq_req);
+	return err;
+}
+EXPORT_SYMBOL(mmc_blk_cmdq_issue_flush_rq);
+
+static void mmc_blk_cmdq_reset(struct mmc_host *host, bool clear_all)
+{
+	int err = 0;
+
+	if (mmc_cmdq_halt(host, true)) {
+		pr_err("%s: halt failed\n", mmc_hostname(host));
+		goto reset;
+	}
+
+	if (clear_all)
+		mmc_cmdq_discard_queue(host, 0);
+reset:
+	mmc_host_clk_hold(host);
+	host->cmdq_ops->disable(host, true);
+	mmc_host_clk_release(host);
+	err = mmc_cmdq_hw_reset(host);
+	if (err && err != -EOPNOTSUPP) {
+		pr_err("%s: failed to cmdq_hw_reset err = %d\n",
+				mmc_hostname(host), err);
+		mmc_host_clk_hold(host);
+		host->cmdq_ops->enable(host);
+		mmc_host_clk_release(host);
+		mmc_cmdq_halt(host, false);
+		goto out;
+	}
+	/*
+	 * CMDQ HW reset would have already made CQE
+	 * in unhalted state, but reflect the same
+	 * in software state of cmdq_ctx.
+	 */
+	mmc_host_clr_halt(host);
+out:
+	return;
+}
+
+/**
+ * is_cmdq_dcmd_req - Checks if tag belongs to DCMD request.
+ * @q:		request_queue pointer.
+ * @tag:	tag number of request to check.
+ *
+ * This function checks if the request with tag number "tag"
+ * is a DCMD request or not based on cmdq_req_flags set.
+ *
+ * returns true if DCMD req, otherwise false.
+ */
+static bool is_cmdq_dcmd_req(struct request_queue *q, int tag)
+{
+	struct request *req;
+	struct mmc_queue_req *mq_rq;
+	struct mmc_cmdq_req *cmdq_req;
+
+	req = blk_queue_find_tag(q, tag);
+	if (WARN_ON(!req))
+		goto out;
+	mq_rq = req->special;
+	if (WARN_ON(!mq_rq))
+		goto out;
+	cmdq_req = &(mq_rq->cmdq_req);
+	return (cmdq_req->cmdq_req_flags & DCMD);
+out:
+	return -ENOENT;
+}
+
+/**
+ * mmc_blk_cmdq_reset_all - Reset everything for CMDQ block request.
+ * @host:	mmc_host pointer.
+ * @err:	error for which reset is performed.
+ *
+ * This function implements reset_all functionality for
+ * cmdq. It resets the controller, power cycle the card,
+ * and invalidate all busy tags(requeue all request back to
+ * elevator).
+ */
+static void mmc_blk_cmdq_reset_all(struct mmc_host *host, int err)
+{
+	struct mmc_request *mrq = host->err_mrq;
+	struct mmc_card *card = host->card;
+	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
+	struct request_queue *q;
+	int itag = 0;
+	int ret = 0;
+
+	if (WARN_ON(!mrq))
+		return;
+
+	q = mrq->req->q;
+	WARN_ON(!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));
+
+	#ifdef CONFIG_MMC_CLKGATE
+	pr_debug("%s: %s: active_reqs = %lu, clk_requests = %d\n",
+			mmc_hostname(host), __func__,
+			ctx_info->active_reqs, host->clk_requests);
+	#endif
+
+	mmc_blk_cmdq_reset(host, false);
+
+	for_each_set_bit(itag, &ctx_info->active_reqs,
+			host->num_cq_slots) {
+		ret = is_cmdq_dcmd_req(q, itag);
+		if (WARN_ON(ret == -ENOENT))
+			continue;
+		if (!ret) {
+			WARN_ON(!test_and_clear_bit(itag,
+				 &ctx_info->data_active_reqs));
+			mmc_cmdq_post_req(host, itag, err);
+		} else {
+			clear_bit(CMDQ_STATE_DCMD_ACTIVE,
+					&ctx_info->curr_state);
+		}
+		WARN_ON(!test_and_clear_bit(itag,
+					&ctx_info->active_reqs));
+		mmc_host_clk_release(host);
+		mmc_put_card(card);
+	}
+
+	spin_lock_irq(q->queue_lock);
+	blk_queue_invalidate_tags(q);
+	spin_unlock_irq(q->queue_lock);
+}
+
+static void mmc_blk_cmdq_shutdown(struct mmc_queue *mq)
+{
+	int err;
+	struct mmc_card *card = mq->card;
+	struct mmc_host *host = card->host;
+
+	mmc_get_card(card);
+	mmc_host_clk_hold(host);
+	err = mmc_cmdq_halt(host, true);
+	if (err) {
+		pr_err("%s: halt: failed: %d\n", __func__, err);
+		goto out;
+	}
+
+	/* disable CQ mode in card */
+	if (mmc_card_cmdq(card)) {
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 EXT_CSD_CMDQ, 0,
+				 card->ext_csd.generic_cmd6_time);
+		if (err) {
+			pr_err("%s: failed to switch card to legacy mode: %d\n",
+			       __func__, err);
+			goto out;
+		}
+		mmc_card_clr_cmdq(card);
+	}
+	host->cmdq_ops->disable(host, false);
+	host->card->cmdq_init = false;
+out:
+	mmc_host_clk_release(host);
+	mmc_put_card(card);
+}
+
+static enum blk_eh_timer_return mmc_blk_cmdq_req_timed_out(struct request *req)
+{
+	struct mmc_queue *mq = req->q->queuedata;
+	struct mmc_host *host = mq->card->host;
+	struct mmc_queue_req *mq_rq = req->special;
+	struct mmc_request *mrq;
+	struct mmc_cmdq_req *cmdq_req;
+	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
+
+	BUG_ON(!host);
+
+	/*
+	 * The mmc_queue_req will be present only if the request
+	 * is issued to the LLD. The request could be fetched from
+	 * block layer queue but could be waiting to be issued
+	 * (for e.g. clock scaling is waiting for an empty cmdq queue)
+	 * Reset the timer in such cases to give LLD more time
+	 */
+	if (!mq_rq) {
+		pr_warn("%s: restart timer for tag: %d\n", __func__, req->tag);
+		return BLK_EH_RESET_TIMER;
+	}
+
+	mrq = &mq_rq->cmdq_req.mrq;
+	cmdq_req = &mq_rq->cmdq_req;
+
+	BUG_ON(!mrq || !cmdq_req);
+
+	if (cmdq_req->cmdq_req_flags & DCMD)
+		mrq->cmd->error = -ETIMEDOUT;
+	else
+		mrq->data->error = -ETIMEDOUT;
+
+	if (mrq->cmd && mrq->cmd->error) {
+		if (!(mrq->req->cmd_flags & REQ_PREFLUSH)) {
+			/*
+			 * Notify completion for non flush commands like
+			 * discard that wait for DCMD finish.
+			 */
+			set_bit(CMDQ_STATE_REQ_TIMED_OUT,
+					&ctx_info->curr_state);
+			complete(&mrq->completion);
+			return BLK_EH_NOT_HANDLED;
+		}
+	}
+
+	if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state) ||
+		test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state))
+		return BLK_EH_NOT_HANDLED;
+
+	set_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
+	return BLK_EH_HANDLED;
+}
+
+/*
+ * mmc_blk_cmdq_err: error handling of cmdq error requests.
+ * Function should be called in context of error out request
+ * which has claim_host and rpm acquired.
+ * This may be called with CQ engine halted. Make sure to
+ * unhalt it after error recovery.
+ *
+ * TODO: Currently cmdq error handler does reset_all in case
+ * of any erorr. Need to optimize error handling.
+ */
+static void mmc_blk_cmdq_err(struct mmc_queue *mq)
+{
+	struct mmc_host *host = mq->card->host;
+	struct mmc_request *mrq = host->err_mrq;
+	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
+	struct request_queue *q;
+	int err, ret;
+	u32 status = 0;
+
+	mmc_host_clk_hold(host);
+	host->cmdq_ops->dumpstate(host);
+	mmc_host_clk_release(host);
+
+	if (WARN_ON(!mrq))
+		return;
+
+	q = mrq->req->q;
+	err = mmc_cmdq_halt(host, true);
+	if (err) {
+		pr_err("halt: failed: %d\n", err);
+		goto reset;
+	}
+
+	/* RED error - Fatal: requires reset */
+	if (mrq->cmdq_req->resp_err) {
+		err = mrq->cmdq_req->resp_err;
+		if (mmc_host_halt(host) || mmc_host_cq_disable(host)) {
+			ret = get_card_status(host->card, &status, 0);
+			if (ret)
+				pr_err("%s: CMD13 failed with err %d\n",
+						mmc_hostname(host), ret);
+		}
+		pr_err("%s: Response error detected with device status 0x%08x\n",
+			mmc_hostname(host), status);
+		goto reset;
+	}
+
+	/*
+	 * In case of software request time-out, we schedule err work only for
+	 * the first error out request and handles all other request in flight
+	 * here.
+	 */
+	if (test_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state)) {
+		err = -ETIMEDOUT;
+	} else if (mrq->data && mrq->data->error) {
+		err = mrq->data->error;
+	} else if (mrq->cmd && mrq->cmd->error) {
+		/* DCMD commands */
+		err = mrq->cmd->error;
+	}
+
+reset:
+	mmc_blk_cmdq_reset_all(host, err);
+	if (mrq->cmdq_req->resp_err)
+		mrq->cmdq_req->resp_err = false;
+	mmc_cmdq_halt(host, false);
+
+	host->err_mrq = NULL;
+	clear_bit(CMDQ_STATE_REQ_TIMED_OUT, &ctx_info->curr_state);
+	WARN_ON(!test_and_clear_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));
+	wake_up(&ctx_info->wait);
+}
+
+/* invoked by block layer in softirq context */
+void mmc_blk_cmdq_complete_rq(struct request *rq)
+{
+	struct mmc_queue_req *mq_rq = rq->special;
+	struct mmc_request *mrq = &mq_rq->cmdq_req.mrq;
+	struct mmc_host *host = mrq->host;
+	struct mmc_cmdq_context_info *ctx_info = &host->cmdq_ctx;
+	struct mmc_cmdq_req *cmdq_req = &mq_rq->cmdq_req;
+	struct mmc_queue *mq = (struct mmc_queue *)rq->q->queuedata;
+	int err = 0;
+	bool is_dcmd = false;
+
+	if (mrq->cmd && mrq->cmd->error)
+		err = mrq->cmd->error;
+	else if (mrq->data && mrq->data->error)
+		err = mrq->data->error;
+
+	if (err || cmdq_req->resp_err) {
+		pr_err("%s: %s: txfr error(%d)/resp_err(%d)\n",
+				mmc_hostname(mrq->host), __func__, err,
+				cmdq_req->resp_err);
+		if (test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) {
+			pr_err("%s: CQ in error state, ending current req: %d\n",
+				__func__, err);
+		} else {
+			set_bit(CMDQ_STATE_ERR, &ctx_info->curr_state);
+			BUG_ON(host->err_mrq != NULL);
+			host->err_mrq = mrq;
+			schedule_work(&mq->cmdq_err_work);
+		}
+		goto out;
+	}
+	/*
+	 * In case of error CMDQ is expected to be either in halted
+	 * or disable state so cannot receive any completion of
+	 * other requests.
+	 */
+	BUG_ON(test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state));
+
+	/* clear pending request */
+	BUG_ON(!test_and_clear_bit(cmdq_req->tag,
+				   &ctx_info->active_reqs));
+	if (cmdq_req->cmdq_req_flags & DCMD)
+		is_dcmd = true;
+	else
+		BUG_ON(!test_and_clear_bit(cmdq_req->tag,
+					 &ctx_info->data_active_reqs));
+	if (!is_dcmd)
+		mmc_cmdq_post_req(host, cmdq_req->tag, err);
+	if (cmdq_req->cmdq_req_flags & DCMD) {
+		clear_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx_info->curr_state);
+		blk_end_request_all(rq, err);
+		goto out;
+	}
+
+	blk_end_request(rq, err, cmdq_req->data.bytes_xfered);
+
+out:
+
+	mmc_cmdq_clk_scaling_stop_busy(host, true, is_dcmd);
+	if (!test_bit(CMDQ_STATE_ERR, &ctx_info->curr_state)) {
+		mmc_host_clk_release(host);
+		wake_up(&ctx_info->wait);
+		mmc_put_card(host->card);
+	}
+
+	if (!ctx_info->active_reqs)
+		wake_up_interruptible(&host->cmdq_ctx.queue_empty_wq);
+
+	return;
+}
+
+/*
+ * Complete reqs from block layer softirq context
+ * Invoked in irq context
+ */
+void mmc_blk_cmdq_req_done(struct mmc_request *mrq)
+{
+	struct request *req = mrq->req;
+
+	blk_complete_request(req);
+}
+EXPORT_SYMBOL(mmc_blk_cmdq_req_done);
+
 static int mmc_blk_issue_rw_rq(struct mmc_queue *mq, struct request *rqc)
 {
 	struct mmc_blk_data *md = mq->data;
@@ -2261,7 +3672,7 @@
 		areq = mmc_start_req(card->host, areq, (int *) &status);
 		if (!areq) {
 			if (status == MMC_BLK_NEW_REQUEST)
-				mq->flags |= MMC_QUEUE_NEW_REQUEST;
+				set_bit(MMC_QUEUE_NEW_REQUEST, &mq->flags);
 			return 0;
 		}
 
@@ -2311,11 +3722,12 @@
 			break;
 		case MMC_BLK_RETRY:
 			retune_retry_done = brq->retune_retry_done;
-			if (retry++ < 5)
+			if (retry++ < MMC_BLK_MAX_RETRIES)
 				break;
 			/* Fall through */
 		case MMC_BLK_ABORT:
-			if (!mmc_blk_reset(md, card->host, type))
+			if (!mmc_blk_reset(md, card->host, type) &&
+				(retry++ < (MMC_BLK_MAX_RETRIES + 1)))
 				break;
 			goto cmd_abort;
 		case MMC_BLK_DATA_ERR: {
@@ -2408,6 +3820,132 @@
 	return 0;
 }
 
+static inline int mmc_blk_cmdq_part_switch(struct mmc_card *card,
+				      struct mmc_blk_data *md)
+{
+	struct mmc_blk_data *main_md = mmc_get_drvdata(card);
+	struct mmc_host *host = card->host;
+	struct mmc_cmdq_context_info *ctx = &host->cmdq_ctx;
+	u8 part_config = card->ext_csd.part_config;
+
+	if ((main_md->part_curr == md->part_type) &&
+	    (card->part_curr == md->part_type))
+		return 0;
+
+	WARN_ON(!((card->host->caps2 & MMC_CAP2_CMD_QUEUE) &&
+		 card->ext_csd.cmdq_support &&
+		 (md->flags & MMC_BLK_CMD_QUEUE)));
+
+	if (!test_bit(CMDQ_STATE_HALT, &ctx->curr_state))
+		WARN_ON(mmc_cmdq_halt(host, true));
+
+	/* disable CQ mode in card */
+	if (mmc_card_cmdq(card)) {
+		WARN_ON(mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 EXT_CSD_CMDQ, 0,
+				  card->ext_csd.generic_cmd6_time));
+		mmc_card_clr_cmdq(card);
+	}
+
+	part_config &= ~EXT_CSD_PART_CONFIG_ACC_MASK;
+	part_config |= md->part_type;
+
+	WARN_ON(mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			 EXT_CSD_PART_CONFIG, part_config,
+			  card->ext_csd.part_time));
+
+	card->ext_csd.part_config = part_config;
+	card->part_curr = md->part_type;
+
+	main_md->part_curr = md->part_type;
+
+	WARN_ON(mmc_blk_cmdq_switch(card, md, true));
+	WARN_ON(mmc_cmdq_halt(host, false));
+
+	return 0;
+}
+
+static int mmc_blk_cmdq_issue_rq(struct mmc_queue *mq, struct request *req)
+{
+	int ret;
+	struct mmc_blk_data *md = mq->data;
+	struct mmc_card *card = md->queue.card;
+
+	mmc_get_card(card);
+
+	if (!card->host->cmdq_ctx.active_reqs && mmc_card_doing_bkops(card)) {
+		ret = mmc_cmdq_halt(card->host, true);
+		if (ret)
+			goto out;
+		ret = mmc_stop_bkops(card);
+		if (ret) {
+			pr_err("%s: %s: mmc_stop_bkops failed %d\n",
+					md->disk->disk_name, __func__, ret);
+			goto out;
+		}
+		ret = mmc_cmdq_halt(card->host, false);
+		if (ret)
+			goto out;
+	}
+
+	ret = mmc_blk_cmdq_part_switch(card, md);
+	if (ret) {
+		pr_err("%s: %s: partition switch failed %d\n",
+				md->disk->disk_name, __func__, ret);
+		goto out;
+	}
+
+	if (req) {
+		struct mmc_host *host = card->host;
+		struct mmc_cmdq_context_info *ctx = &host->cmdq_ctx;
+
+		if ((req_op(req) == REQ_OP_FLUSH || req_op(req) ==  REQ_OP_DISCARD) &&
+		    (card->quirks & MMC_QUIRK_CMDQ_EMPTY_BEFORE_DCMD) &&
+		    ctx->active_small_sector_read_reqs) {
+			ret = wait_event_interruptible(ctx->queue_empty_wq,
+						      !ctx->active_reqs);
+			if (ret) {
+				pr_err("%s: failed while waiting for the CMDQ to be empty %s err (%d)\n",
+					mmc_hostname(host),
+					__func__, ret);
+				BUG_ON(1);
+			}
+			/* clear the counter now */
+			ctx->active_small_sector_read_reqs = 0;
+			/*
+			 * If there were small sector (less than 8 sectors) read
+			 * operations in progress then we have to wait for the
+			 * outstanding requests to finish and should also have
+			 * atleast 6 microseconds delay before queuing the DCMD
+			 * request.
+			 */
+			udelay(MMC_QUIRK_CMDQ_DELAY_BEFORE_DCMD);
+		}
+
+		if (req_op(req) == REQ_OP_DISCARD) {
+			ret = mmc_blk_cmdq_issue_discard_rq(mq, req);
+		} else if (req_op(req) == REQ_OP_SECURE_ERASE) {
+			if (!(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
+				ret = mmc_blk_cmdq_issue_secdiscard_rq(mq, req);
+			else
+				ret = mmc_blk_cmdq_issue_discard_rq(mq, req);
+		} else if (req_op(req) == REQ_OP_FLUSH) {
+			ret = mmc_blk_cmdq_issue_flush_rq(mq, req);
+		} else {
+			ret = mmc_blk_cmdq_issue_rw_rq(mq, req);
+		}
+	}
+
+	return ret;
+
+out:
+	if (req)
+		blk_end_request_all(req, ret);
+	mmc_put_card(card);
+
+	return ret;
+}
+
 int mmc_blk_issue_rq(struct mmc_queue *mq, struct request *req)
 {
 	int ret;
@@ -2415,14 +3953,33 @@
 	struct mmc_card *card = md->queue.card;
 	struct mmc_host *host = card->host;
 	unsigned long flags;
+	unsigned int cmd_flags = req ? req->cmd_flags : 0;
 	bool req_is_special = mmc_req_is_special(req);
+	int err;
 
-	if (req && !mq->mqrq_prev->req)
+	if (req && !mq->mqrq_prev->req) {
 		/* claim host only for the first request */
 		mmc_get_card(card);
 
+		if (mmc_card_doing_bkops(host->card)) {
+			ret = mmc_stop_bkops(host->card);
+			if (ret)
+				goto out;
+		}
+	}
+
 	ret = mmc_blk_part_switch(card, md);
+
 	if (ret) {
+		err = mmc_blk_reset(md, card->host, MMC_BLK_PARTSWITCH);
+		if (!err) {
+			pr_err("%s: mmc_blk_reset(MMC_BLK_PARTSWITCH) succeeded.\n",
+					mmc_hostname(host));
+			mmc_blk_reset_success(md, MMC_BLK_PARTSWITCH);
+		} else
+			pr_err("%s: mmc_blk_reset(MMC_BLK_PARTSWITCH) failed.\n",
+				mmc_hostname(host));
+
 		if (req) {
 			blk_end_request_all(req, -EIO);
 		}
@@ -2430,7 +3987,9 @@
 		goto out;
 	}
 
-	mq->flags &= ~MMC_QUEUE_NEW_REQUEST;
+	mmc_blk_write_packing_control(mq, req);
+
+	clear_bit(MMC_QUEUE_NEW_REQUEST, &mq->flags);
 	if (req && req_op(req) == REQ_OP_DISCARD) {
 		/* complete ongoing async transfer before issuing discard */
 		if (card->host->areq)
@@ -2440,8 +3999,12 @@
 		/* complete ongoing async transfer before issuing secure erase*/
 		if (card->host->areq)
 			mmc_blk_issue_rw_rq(mq, NULL);
-		ret = mmc_blk_issue_secdiscard_rq(mq, req);
-	} else if (req && req_op(req) == REQ_OP_FLUSH) {
+		if (!(card->quirks & MMC_QUIRK_SEC_ERASE_TRIM_BROKEN))
+			ret = mmc_blk_issue_secdiscard_rq(mq, req);
+		else
+			ret = mmc_blk_issue_discard_rq(mq, req);
+	} else if ((req && req_op(req) == REQ_OP_FLUSH) ||
+			(cmd_flags & REQ_BARRIER)) {
 		/* complete ongoing async transfer before issuing flush */
 		if (card->host->areq)
 			mmc_blk_issue_rw_rq(mq, NULL);
@@ -2456,7 +4019,8 @@
 	}
 
 out:
-	if ((!req && !(mq->flags & MMC_QUEUE_NEW_REQUEST)) || req_is_special)
+	if ((!req && !(test_bit(MMC_QUEUE_NEW_REQUEST, &mq->flags))) ||
+		req_is_special)
 		/*
 		 * Release host when there are no more requests
 		 * and after special request(discard, flush) is done.
@@ -2525,7 +4089,7 @@
 	INIT_LIST_HEAD(&md->part);
 	md->usage = 1;
 
-	ret = mmc_init_queue(&md->queue, card, &md->lock, subname);
+	ret = mmc_init_queue(&md->queue, card, NULL, subname, area_type);
 	if (ret)
 		goto err_putdisk;
 
@@ -2581,7 +4145,16 @@
 		blk_queue_write_cache(md->queue.queue, true, true);
 	}
 
-	if (mmc_card_mmc(card) &&
+	if (card->cmdq_init) {
+		md->flags |= MMC_BLK_CMD_QUEUE;
+		md->queue.cmdq_complete_fn = mmc_blk_cmdq_complete_rq;
+		md->queue.cmdq_issue_fn = mmc_blk_cmdq_issue_rq;
+		md->queue.cmdq_error_fn = mmc_blk_cmdq_err;
+		md->queue.cmdq_req_timed_out = mmc_blk_cmdq_req_timed_out;
+		md->queue.cmdq_shutdown = mmc_blk_cmdq_shutdown;
+	}
+
+	if (mmc_card_mmc(card) && !card->cmdq_init &&
 	    (area_type == MMC_BLK_DATA_AREA_MAIN) &&
 	    (md->flags & MMC_BLK_CMD23) &&
 	    card->ext_csd.packed_event_en) {
@@ -2694,6 +4267,10 @@
 		mmc_cleanup_queue(&md->queue);
 		if (md->flags & MMC_BLK_PACKED_CMD)
 			mmc_packed_clean(&md->queue);
+		if (md->flags & MMC_BLK_CMD_QUEUE)
+			mmc_cmdq_clean(&md->queue, card);
+		device_remove_file(disk_to_dev(md->disk),
+				   &md->num_wr_reqs_to_start_packing);
 		if (md->disk->flags & GENHD_FL_UP) {
 			device_remove_file(disk_to_dev(md->disk), &md->force_ro);
 			if ((md->area_type & MMC_BLK_DATA_AREA_BOOT) &&
@@ -2781,8 +4358,37 @@
 		if (ret)
 			goto power_ro_lock_fail;
 	}
+
+	md->num_wr_reqs_to_start_packing.show =
+		num_wr_reqs_to_start_packing_show;
+	md->num_wr_reqs_to_start_packing.store =
+		num_wr_reqs_to_start_packing_store;
+	sysfs_attr_init(&md->num_wr_reqs_to_start_packing.attr);
+	md->num_wr_reqs_to_start_packing.attr.name =
+		"num_wr_reqs_to_start_packing";
+	md->num_wr_reqs_to_start_packing.attr.mode = S_IRUGO | S_IWUSR;
+	ret = device_create_file(disk_to_dev(md->disk),
+				 &md->num_wr_reqs_to_start_packing);
+	if (ret)
+		goto num_wr_reqs_to_start_packing_fail;
+
+	md->no_pack_for_random.show = no_pack_for_random_show;
+	md->no_pack_for_random.store = no_pack_for_random_store;
+	sysfs_attr_init(&md->no_pack_for_random.attr);
+	md->no_pack_for_random.attr.name = "no_pack_for_random";
+	md->no_pack_for_random.attr.mode = S_IRUGO | S_IWUSR;
+	ret = device_create_file(disk_to_dev(md->disk),
+				 &md->no_pack_for_random);
+	if (ret)
+		goto no_pack_for_random_fails;
+
 	return ret;
 
+no_pack_for_random_fails:
+	device_remove_file(disk_to_dev(md->disk),
+			   &md->num_wr_reqs_to_start_packing);
+num_wr_reqs_to_start_packing_fail:
+	device_remove_file(disk_to_dev(md->disk), &md->power_ro_lock);
 power_ro_lock_fail:
 #ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
 	device_remove_file(disk_to_dev(md->disk), &dev_attr_cache_size);
@@ -2830,6 +4436,8 @@
 		  MMC_QUIRK_BLK_NO_CMD23),
 	MMC_FIXUP("MMC32G", CID_MANFID_TOSHIBA, CID_OEMID_ANY, add_quirk_mmc,
 		  MMC_QUIRK_BLK_NO_CMD23),
+	MMC_FIXUP(CID_NAME_ANY, CID_MANFID_TOSHIBA, CID_OEMID_ANY,
+		  add_quirk_mmc, MMC_QUIRK_CMDQ_EMPTY_BEFORE_DCMD),
 
 	/*
 	 * Some MMC cards need longer data read timeout than indicated in CSD.
@@ -2847,6 +4455,13 @@
 		  MMC_QUIRK_LONG_READ_TIME),
 
 	/*
+	 * Hynix eMMC cards need longer data read timeout than
+	 * indicated in CSD.
+	 */
+	MMC_FIXUP(CID_NAME_ANY, CID_MANFID_HYNIX, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_LONG_READ_TIME),
+
+	/*
 	 * On these Samsung MoviNAND parts, performing secure erase or
 	 * secure trim can result in unrecoverable corruption due to a
 	 * firmware bug.
@@ -2877,6 +4492,10 @@
 	MMC_FIXUP("V10016", CID_MANFID_KINGSTON, CID_OEMID_ANY, add_quirk_mmc,
 		  MMC_QUIRK_TRIM_BROKEN),
 
+	/* Some INAND MCP devices advertise incorrect timeout values */
+	MMC_FIXUP("SEM04G", 0x45, CID_OEMID_ANY, add_quirk_mmc,
+		  MMC_QUIRK_INAND_DATA_TIMEOUT),
+
 	END_FIXUP
 };
 
@@ -2916,7 +4535,8 @@
 			goto out;
 	}
 
-	pm_runtime_set_autosuspend_delay(&card->dev, 3000);
+	pm_runtime_use_autosuspend(&card->dev);
+	pm_runtime_set_autosuspend_delay(&card->dev, MMC_AUTOSUSPEND_DELAY_MS);
 	pm_runtime_use_autosuspend(&card->dev);
 
 	/*
@@ -2952,23 +4572,40 @@
 	dev_set_drvdata(&card->dev, NULL);
 }
 
-static int _mmc_blk_suspend(struct mmc_card *card)
+static int _mmc_blk_suspend(struct mmc_card *card, bool wait)
 {
 	struct mmc_blk_data *part_md;
 	struct mmc_blk_data *md = dev_get_drvdata(&card->dev);
+	int rc = 0;
 
 	if (md) {
-		mmc_queue_suspend(&md->queue);
+		rc = mmc_queue_suspend(&md->queue, wait);
+		if (rc)
+			goto out;
 		list_for_each_entry(part_md, &md->part, part) {
-			mmc_queue_suspend(&part_md->queue);
+			rc = mmc_queue_suspend(&part_md->queue, wait);
+			if (rc)
+				goto out_resume;
 		}
 	}
-	return 0;
+	goto out;
+
+ out_resume:
+	mmc_queue_resume(&md->queue);
+	list_for_each_entry(part_md, &md->part, part) {
+		mmc_queue_resume(&part_md->queue);
+	}
+ out:
+	return rc;
 }
 
 static void mmc_blk_shutdown(struct mmc_card *card)
 {
-	_mmc_blk_suspend(card);
+	_mmc_blk_suspend(card, 1);
+
+	/* send power off notification */
+	if (mmc_card_mmc(card))
+		mmc_send_pon(card);
 }
 
 #ifdef CONFIG_PM_SLEEP
@@ -2976,7 +4613,7 @@
 {
 	struct mmc_card *card = mmc_dev_to_card(dev);
 
-	return _mmc_blk_suspend(card);
+	return _mmc_blk_suspend(card, 0);
 }
 
 static int mmc_blk_resume(struct device *dev)
diff --git a/drivers/mmc/card/mmc_block_test.c b/drivers/mmc/card/mmc_block_test.c
new file mode 100644
index 0000000..967affa
--- /dev/null
+++ b/drivers/mmc/card/mmc_block_test.c
@@ -0,0 +1,2038 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+/* MMC block test */
+
+#include <linux/module.h>
+#include <linux/blkdev.h>
+#include <linux/debugfs.h>
+#include <linux/mmc/card.h>
+#include <linux/mmc/host.h>
+#include <linux/delay.h>
+#include <linux/test-iosched.h>
+#include "queue.h"
+
+#define MODULE_NAME "mmc_block_test"
+#define TEST_MAX_SECTOR_RANGE		(600*1024*1024) /* 600 MB */
+#define TEST_MAX_BIOS_PER_REQ		120
+#define CMD23_PACKED_BIT	(1 << 30)
+#define LARGE_PRIME_1	1103515367
+#define LARGE_PRIME_2	35757
+#define PACKED_HDR_VER_MASK 0x000000FF
+#define PACKED_HDR_RW_MASK 0x0000FF00
+#define PACKED_HDR_NUM_REQS_MASK 0x00FF0000
+#define PACKED_HDR_BITS_16_TO_29_SET 0x3FFF0000
+
+#define test_pr_debug(fmt, args...) pr_debug("%s: "fmt"\n", MODULE_NAME, args)
+#define test_pr_info(fmt, args...) pr_info("%s: "fmt"\n", MODULE_NAME, args)
+#define test_pr_err(fmt, args...) pr_err("%s: "fmt"\n", MODULE_NAME, args)
+
+enum is_random {
+	NON_RANDOM_TEST,
+	RANDOM_TEST,
+};
+
+enum mmc_block_test_testcases {
+	/* Start of send write packing test group */
+	SEND_WRITE_PACKING_MIN_TESTCASE,
+	TEST_STOP_DUE_TO_READ = SEND_WRITE_PACKING_MIN_TESTCASE,
+	TEST_STOP_DUE_TO_READ_AFTER_MAX_REQS,
+	TEST_STOP_DUE_TO_FLUSH,
+	TEST_STOP_DUE_TO_FLUSH_AFTER_MAX_REQS,
+	TEST_STOP_DUE_TO_EMPTY_QUEUE,
+	TEST_STOP_DUE_TO_MAX_REQ_NUM,
+	TEST_STOP_DUE_TO_THRESHOLD,
+	SEND_WRITE_PACKING_MAX_TESTCASE = TEST_STOP_DUE_TO_THRESHOLD,
+
+	/* Start of err check test group */
+	ERR_CHECK_MIN_TESTCASE,
+	TEST_RET_ABORT = ERR_CHECK_MIN_TESTCASE,
+	TEST_RET_PARTIAL_FOLLOWED_BY_SUCCESS,
+	TEST_RET_PARTIAL_FOLLOWED_BY_ABORT,
+	TEST_RET_PARTIAL_MULTIPLE_UNTIL_SUCCESS,
+	TEST_RET_PARTIAL_MAX_FAIL_IDX,
+	TEST_RET_RETRY,
+	TEST_RET_CMD_ERR,
+	TEST_RET_DATA_ERR,
+	ERR_CHECK_MAX_TESTCASE = TEST_RET_DATA_ERR,
+
+	/* Start of send invalid test group */
+	INVALID_CMD_MIN_TESTCASE,
+	TEST_HDR_INVALID_VERSION = INVALID_CMD_MIN_TESTCASE,
+	TEST_HDR_WRONG_WRITE_CODE,
+	TEST_HDR_INVALID_RW_CODE,
+	TEST_HDR_DIFFERENT_ADDRESSES,
+	TEST_HDR_REQ_NUM_SMALLER_THAN_ACTUAL,
+	TEST_HDR_REQ_NUM_LARGER_THAN_ACTUAL,
+	TEST_HDR_CMD23_PACKED_BIT_SET,
+	TEST_CMD23_MAX_PACKED_WRITES,
+	TEST_CMD23_ZERO_PACKED_WRITES,
+	TEST_CMD23_PACKED_BIT_UNSET,
+	TEST_CMD23_REL_WR_BIT_SET,
+	TEST_CMD23_BITS_16TO29_SET,
+	TEST_CMD23_HDR_BLK_NOT_IN_COUNT,
+	INVALID_CMD_MAX_TESTCASE = TEST_CMD23_HDR_BLK_NOT_IN_COUNT,
+
+	/*
+	 * Start of packing control test group.
+	 * in these next testcases the abbreviation FB = followed by
+	 */
+	PACKING_CONTROL_MIN_TESTCASE,
+	TEST_PACKING_EXP_ONE_OVER_TRIGGER_FB_READ =
+				PACKING_CONTROL_MIN_TESTCASE,
+	TEST_PACKING_EXP_N_OVER_TRIGGER,
+	TEST_PACKING_EXP_N_OVER_TRIGGER_FB_READ,
+	TEST_PACKING_EXP_N_OVER_TRIGGER_FLUSH_N,
+	TEST_PACKING_EXP_THRESHOLD_OVER_TRIGGER,
+	TEST_PACKING_NOT_EXP_LESS_THAN_TRIGGER_REQUESTS,
+	TEST_PACKING_NOT_EXP_TRIGGER_REQUESTS,
+	TEST_PACKING_NOT_EXP_TRIGGER_READ_TRIGGER,
+	TEST_PACKING_NOT_EXP_TRIGGER_FLUSH_TRIGGER,
+	TEST_PACK_MIX_PACKED_NO_PACKED_PACKED,
+	TEST_PACK_MIX_NO_PACKED_PACKED_NO_PACKED,
+	PACKING_CONTROL_MAX_TESTCASE = TEST_PACK_MIX_NO_PACKED_PACKED_NO_PACKED,
+};
+
+enum mmc_block_test_group {
+	TEST_NO_GROUP,
+	TEST_GENERAL_GROUP,
+	TEST_SEND_WRITE_PACKING_GROUP,
+	TEST_ERR_CHECK_GROUP,
+	TEST_SEND_INVALID_GROUP,
+	TEST_PACKING_CONTROL_GROUP,
+};
+
+struct mmc_block_test_debug {
+	struct dentry *send_write_packing_test;
+	struct dentry *err_check_test;
+	struct dentry *send_invalid_packed_test;
+	struct dentry *random_test_seed;
+	struct dentry *packing_control_test;
+};
+
+struct mmc_block_test_data {
+	/* The number of write requests that the test will issue */
+	int num_requests;
+	/* The expected write packing statistics for the current test */
+	struct mmc_wr_pack_stats exp_packed_stats;
+	/*
+	 * A user-defined seed for random choices of number of bios written in
+	 * a request, and of number of requests issued in a test
+	 * This field is randomly updated after each use
+	 */
+	unsigned int random_test_seed;
+	/* A retry counter used in err_check tests */
+	int err_check_counter;
+	/* Can be one of the values of enum test_group */
+	enum mmc_block_test_group test_group;
+	/*
+	 * Indicates if the current testcase is running with random values of
+	 * num_requests and num_bios (in each request)
+	 */
+	int is_random;
+	/* Data structure for debugfs dentrys */
+	struct mmc_block_test_debug debug;
+	/*
+	 * Data structure containing individual test information, including
+	 * self-defined specific data
+	 */
+	struct test_info test_info;
+	/* mmc block device test */
+	struct blk_dev_test_type bdt;
+};
+
+static struct mmc_block_test_data *mbtd;
+
+void print_mmc_packing_stats(struct mmc_card *card)
+{
+	int i;
+	int max_num_of_packed_reqs = 0;
+
+	if ((!card) || (!card->wr_pack_stats.packing_events))
+		return;
+
+	max_num_of_packed_reqs = card->ext_csd.max_packed_writes;
+
+	spin_lock(&card->wr_pack_stats.lock);
+
+	pr_info("%s: write packing statistics:\n",
+		mmc_hostname(card->host));
+
+	for (i = 1 ; i <= max_num_of_packed_reqs ; ++i) {
+		if (card->wr_pack_stats.packing_events[i] != 0)
+			pr_info("%s: Packed %d reqs - %d times\n",
+				mmc_hostname(card->host), i,
+				card->wr_pack_stats.packing_events[i]);
+	}
+
+	pr_info("%s: stopped packing due to the following reasons:\n",
+		mmc_hostname(card->host));
+
+	if (card->wr_pack_stats.pack_stop_reason[EXCEEDS_SEGMENTS])
+		pr_info("%s: %d times: exceedmax num of segments\n",
+			mmc_hostname(card->host),
+			card->wr_pack_stats.pack_stop_reason[EXCEEDS_SEGMENTS]);
+	if (card->wr_pack_stats.pack_stop_reason[EXCEEDS_SECTORS])
+		pr_info("%s: %d times: exceeding the max num of sectors\n",
+			mmc_hostname(card->host),
+			card->wr_pack_stats.pack_stop_reason[EXCEEDS_SECTORS]);
+	if (card->wr_pack_stats.pack_stop_reason[WRONG_DATA_DIR])
+		pr_info("%s: %d times: wrong data direction\n",
+			mmc_hostname(card->host),
+			card->wr_pack_stats.pack_stop_reason[WRONG_DATA_DIR]);
+	if (card->wr_pack_stats.pack_stop_reason[FLUSH_OR_DISCARD])
+		pr_info("%s: %d times: flush or discard\n",
+			mmc_hostname(card->host),
+			card->wr_pack_stats.pack_stop_reason[FLUSH_OR_DISCARD]);
+	if (card->wr_pack_stats.pack_stop_reason[EMPTY_QUEUE])
+		pr_info("%s: %d times: empty queue\n",
+			mmc_hostname(card->host),
+			card->wr_pack_stats.pack_stop_reason[EMPTY_QUEUE]);
+	if (card->wr_pack_stats.pack_stop_reason[REL_WRITE])
+		pr_info("%s: %d times: rel write\n",
+			mmc_hostname(card->host),
+			card->wr_pack_stats.pack_stop_reason[REL_WRITE]);
+	if (card->wr_pack_stats.pack_stop_reason[THRESHOLD])
+		pr_info("%s: %d times: Threshold\n",
+			mmc_hostname(card->host),
+			card->wr_pack_stats.pack_stop_reason[THRESHOLD]);
+
+	spin_unlock(&card->wr_pack_stats.lock);
+}
+
+/*
+ * A callback assigned to the packed_test_fn field.
+ * Called from block layer in mmc_blk_packed_hdr_wrq_prep.
+ * Here we alter the packed header or CMD23 in order to send an invalid
+ * packed command to the card.
+ */
+static void test_invalid_packed_cmd(struct request_queue *q,
+				    struct mmc_queue_req *mqrq)
+{
+	struct mmc_queue *mq = q->queuedata;
+	u32 *packed_cmd_hdr = mqrq->packed->cmd_hdr;
+	struct request *req = mqrq->req;
+	struct request *second_rq;
+	struct test_request *test_rq;
+	struct mmc_blk_request *brq = &mqrq->brq;
+	int num_requests;
+	int max_packed_reqs;
+
+	if (!mq) {
+		test_pr_err("%s: NULL mq", __func__);
+		return;
+	}
+
+	test_rq = (struct test_request *)req->elv.priv[0];
+	if (!test_rq) {
+		test_pr_err("%s: NULL test_rq", __func__);
+		return;
+	}
+	max_packed_reqs = mq->card->ext_csd.max_packed_writes;
+
+	switch (mbtd->test_info.testcase) {
+	case TEST_HDR_INVALID_VERSION:
+		test_pr_info("%s: set invalid header version", __func__);
+		/* Put 0 in header version field (1 byte, offset 0 in header) */
+		packed_cmd_hdr[0] = packed_cmd_hdr[0] & ~PACKED_HDR_VER_MASK;
+		break;
+	case TEST_HDR_WRONG_WRITE_CODE:
+		test_pr_info("%s: wrong write code", __func__);
+		/* Set R/W field with R value (1 byte, offset 1 in header) */
+		packed_cmd_hdr[0] = packed_cmd_hdr[0] & ~PACKED_HDR_RW_MASK;
+		packed_cmd_hdr[0] = packed_cmd_hdr[0] | 0x00000100;
+		break;
+	case TEST_HDR_INVALID_RW_CODE:
+		test_pr_info("%s: invalid r/w code", __func__);
+		/* Set R/W field with invalid value */
+		packed_cmd_hdr[0] = packed_cmd_hdr[0] & ~PACKED_HDR_RW_MASK;
+		packed_cmd_hdr[0] = packed_cmd_hdr[0] | 0x00000400;
+		break;
+	case TEST_HDR_DIFFERENT_ADDRESSES:
+		test_pr_info("%s: different addresses", __func__);
+		second_rq = list_entry(req->queuelist.next, struct request,
+				queuelist);
+		test_pr_info("%s: test_rq->sector=%ld, second_rq->sector=%ld",
+			      __func__, (long)req->__sector,
+			     (long)second_rq->__sector);
+		/*
+		 * Put start sector of second write request in the first write
+		 * request's cmd25 argument in the packed header
+		 */
+		packed_cmd_hdr[3] = second_rq->__sector;
+		break;
+	case TEST_HDR_REQ_NUM_SMALLER_THAN_ACTUAL:
+		test_pr_info("%s: request num smaller than actual" , __func__);
+		num_requests = (packed_cmd_hdr[0] & PACKED_HDR_NUM_REQS_MASK)
+									>> 16;
+		/* num of entries is decremented by 1 */
+		num_requests = (num_requests - 1) << 16;
+		/*
+		 * Set number of requests field in packed write header to be
+		 * smaller than the actual number (1 byte, offset 2 in header)
+		 */
+		packed_cmd_hdr[0] = (packed_cmd_hdr[0] &
+				     ~PACKED_HDR_NUM_REQS_MASK) + num_requests;
+		break;
+	case TEST_HDR_REQ_NUM_LARGER_THAN_ACTUAL:
+		test_pr_info("%s: request num larger than actual" , __func__);
+		num_requests = (packed_cmd_hdr[0] & PACKED_HDR_NUM_REQS_MASK)
+									>> 16;
+		/* num of entries is incremented by 1 */
+		num_requests = (num_requests + 1) << 16;
+		/*
+		 * Set number of requests field in packed write header to be
+		 * larger than the actual number (1 byte, offset 2 in header).
+		 */
+		packed_cmd_hdr[0] = (packed_cmd_hdr[0] &
+				     ~PACKED_HDR_NUM_REQS_MASK) + num_requests;
+		break;
+	case TEST_HDR_CMD23_PACKED_BIT_SET:
+		test_pr_info("%s: header CMD23 packed bit set" , __func__);
+		/*
+		 * Set packed bit (bit 30) in cmd23 argument of first and second
+		 * write requests in packed write header.
+		 * These are located at bytes 2 and 4 in packed write header
+		 */
+		packed_cmd_hdr[2] = packed_cmd_hdr[2] | CMD23_PACKED_BIT;
+		packed_cmd_hdr[4] = packed_cmd_hdr[4] | CMD23_PACKED_BIT;
+		break;
+	case TEST_CMD23_MAX_PACKED_WRITES:
+		test_pr_info("%s: CMD23 request num > max_packed_reqs",
+			      __func__);
+		/*
+		 * Set the individual packed cmd23 request num to
+		 * max_packed_reqs + 1
+		 */
+		brq->sbc.arg = MMC_CMD23_ARG_PACKED | (max_packed_reqs + 1);
+		break;
+	case TEST_CMD23_ZERO_PACKED_WRITES:
+		test_pr_info("%s: CMD23 request num = 0", __func__);
+		/* Set the individual packed cmd23 request num to zero */
+		brq->sbc.arg = MMC_CMD23_ARG_PACKED;
+		break;
+	case TEST_CMD23_PACKED_BIT_UNSET:
+		test_pr_info("%s: CMD23 packed bit unset", __func__);
+		/*
+		 * Set the individual packed cmd23 packed bit to 0,
+		 *  although there is a packed write request
+		 */
+		brq->sbc.arg &= ~CMD23_PACKED_BIT;
+		break;
+	case TEST_CMD23_REL_WR_BIT_SET:
+		test_pr_info("%s: CMD23 REL WR bit set", __func__);
+		/* Set the individual packed cmd23 reliable write bit */
+		brq->sbc.arg = MMC_CMD23_ARG_PACKED | MMC_CMD23_ARG_REL_WR;
+		break;
+	case TEST_CMD23_BITS_16TO29_SET:
+		test_pr_info("%s: CMD23 bits [16-29] set", __func__);
+		brq->sbc.arg = MMC_CMD23_ARG_PACKED |
+			PACKED_HDR_BITS_16_TO_29_SET;
+		break;
+	case TEST_CMD23_HDR_BLK_NOT_IN_COUNT:
+		test_pr_info("%s: CMD23 hdr not in block count", __func__);
+		brq->sbc.arg = MMC_CMD23_ARG_PACKED |
+		((rq_data_dir(req) == READ) ? 0 : mqrq->packed->blocks);
+		break;
+	default:
+		test_pr_err("%s: unexpected testcase %d",
+			__func__, mbtd->test_info.testcase);
+		break;
+	}
+}
+
+/*
+ * A callback assigned to the err_check_fn field of the mmc_request by the
+ * MMC/card/block layer.
+ * Called upon request completion by the MMC/core layer.
+ * Here we emulate an error return value from the card.
+ */
+static int test_err_check(struct mmc_card *card, struct mmc_async_req *areq)
+{
+	struct mmc_queue_req *mq_rq = container_of(areq, struct mmc_queue_req,
+			mmc_active);
+	struct request_queue *req_q = test_iosched_get_req_queue();
+	struct mmc_queue *mq;
+	int max_packed_reqs;
+	int ret = 0;
+
+	if (req_q)
+		mq = req_q->queuedata;
+	else {
+		test_pr_err("%s: NULL request_queue", __func__);
+		return 0;
+	}
+
+	if (!mq) {
+		test_pr_err("%s: %s: NULL mq", __func__,
+			mmc_hostname(card->host));
+		return 0;
+	}
+
+	max_packed_reqs = mq->card->ext_csd.max_packed_writes;
+
+	if (!mq_rq) {
+		test_pr_err("%s: %s: NULL mq_rq", __func__,
+			mmc_hostname(card->host));
+		return 0;
+	}
+
+	switch (mbtd->test_info.testcase) {
+	case TEST_RET_ABORT:
+		test_pr_info("%s: return abort", __func__);
+		ret = MMC_BLK_ABORT;
+		break;
+	case TEST_RET_PARTIAL_FOLLOWED_BY_SUCCESS:
+		test_pr_info("%s: return partial followed by success",
+			      __func__);
+		/*
+		 * Since in this testcase num_requests is always >= 2,
+		 * we can be sure that packed_fail_idx is always >= 1
+		 */
+		mq_rq->packed->idx_failure = (mbtd->num_requests / 2);
+		test_pr_info("%s: packed_fail_idx = %d"
+			, __func__, mq_rq->packed->idx_failure);
+		mq->err_check_fn = NULL;
+		ret = MMC_BLK_PARTIAL;
+		break;
+	case TEST_RET_PARTIAL_FOLLOWED_BY_ABORT:
+		if (!mbtd->err_check_counter) {
+			test_pr_info("%s: return partial followed by abort",
+				      __func__);
+			mbtd->err_check_counter++;
+			/*
+			 * Since in this testcase num_requests is always >= 3,
+			 * we have that packed_fail_idx is always >= 1
+			 */
+			mq_rq->packed->idx_failure = (mbtd->num_requests / 2);
+			test_pr_info("%s: packed_fail_idx = %d"
+				, __func__, mq_rq->packed->idx_failure);
+			ret = MMC_BLK_PARTIAL;
+			break;
+		}
+		mbtd->err_check_counter = 0;
+		mq->err_check_fn = NULL;
+		ret = MMC_BLK_ABORT;
+		break;
+	case TEST_RET_PARTIAL_MULTIPLE_UNTIL_SUCCESS:
+		test_pr_info("%s: return partial multiple until success",
+			     __func__);
+		if (++mbtd->err_check_counter >= (mbtd->num_requests)) {
+			mq->err_check_fn = NULL;
+			mbtd->err_check_counter = 0;
+			ret = MMC_BLK_PARTIAL;
+			break;
+		}
+		mq_rq->packed->idx_failure = 1;
+		ret = MMC_BLK_PARTIAL;
+		break;
+	case TEST_RET_PARTIAL_MAX_FAIL_IDX:
+		test_pr_info("%s: return partial max fail_idx", __func__);
+		mq_rq->packed->idx_failure = max_packed_reqs - 1;
+		mq->err_check_fn = NULL;
+		ret = MMC_BLK_PARTIAL;
+		break;
+	case TEST_RET_RETRY:
+		test_pr_info("%s: return retry", __func__);
+		ret = MMC_BLK_RETRY;
+		break;
+	case TEST_RET_CMD_ERR:
+		test_pr_info("%s: return cmd err", __func__);
+		ret = MMC_BLK_CMD_ERR;
+		break;
+	case TEST_RET_DATA_ERR:
+		test_pr_info("%s: return data err", __func__);
+		ret = MMC_BLK_DATA_ERR;
+		break;
+	default:
+		test_pr_err("%s: unexpected testcase %d",
+			__func__, mbtd->test_info.testcase);
+	}
+
+	return ret;
+}
+
+/*
+ * This is a specific implementation for the get_test_case_str_fn function
+ * pointer in the test_info data structure. Given a valid test_data instance,
+ * the function returns a string resembling the test name, based on the testcase
+ */
+static char *get_test_case_str(struct test_data *td)
+{
+	if (!td) {
+		test_pr_err("%s: NULL td", __func__);
+		return NULL;
+	}
+
+	switch (td->test_info.testcase) {
+	case TEST_STOP_DUE_TO_FLUSH:
+		return " stop due to flush";
+	case TEST_STOP_DUE_TO_FLUSH_AFTER_MAX_REQS:
+		return " stop due to flush after max-1 reqs";
+	case TEST_STOP_DUE_TO_READ:
+		return " stop due to read";
+	case TEST_STOP_DUE_TO_READ_AFTER_MAX_REQS:
+		return "Test stop due to read after max-1 reqs";
+	case TEST_STOP_DUE_TO_EMPTY_QUEUE:
+		return "Test stop due to empty queue";
+	case TEST_STOP_DUE_TO_MAX_REQ_NUM:
+		return "Test stop due to max req num";
+	case TEST_STOP_DUE_TO_THRESHOLD:
+		return "Test stop due to exceeding threshold";
+	case TEST_RET_ABORT:
+		return "Test err_check return abort";
+	case TEST_RET_PARTIAL_FOLLOWED_BY_SUCCESS:
+		return "Test err_check return partial followed by success";
+	case TEST_RET_PARTIAL_FOLLOWED_BY_ABORT:
+		return "Test err_check return partial followed by abort";
+	case TEST_RET_PARTIAL_MULTIPLE_UNTIL_SUCCESS:
+		return "Test err_check return partial multiple until success";
+	case TEST_RET_PARTIAL_MAX_FAIL_IDX:
+		return "Test err_check return partial max fail index";
+	case TEST_RET_RETRY:
+		return "Test err_check return retry";
+	case TEST_RET_CMD_ERR:
+		return "Test err_check return cmd error";
+	case TEST_RET_DATA_ERR:
+		return "Test err_check return data error";
+	case TEST_HDR_INVALID_VERSION:
+		return "Test invalid - wrong header version";
+	case TEST_HDR_WRONG_WRITE_CODE:
+		return "Test invalid - wrong write code";
+	case TEST_HDR_INVALID_RW_CODE:
+		return "Test invalid - wrong R/W code";
+	case TEST_HDR_DIFFERENT_ADDRESSES:
+		return "Test invalid - header different addresses";
+	case TEST_HDR_REQ_NUM_SMALLER_THAN_ACTUAL:
+		return "Test invalid - header req num smaller than actual";
+	case TEST_HDR_REQ_NUM_LARGER_THAN_ACTUAL:
+		return "Test invalid - header req num larger than actual";
+	case TEST_HDR_CMD23_PACKED_BIT_SET:
+		return "Test invalid - header cmd23 packed bit set";
+	case TEST_CMD23_MAX_PACKED_WRITES:
+		return "Test invalid - cmd23 max packed writes";
+	case TEST_CMD23_ZERO_PACKED_WRITES:
+		return "Test invalid - cmd23 zero packed writes";
+	case TEST_CMD23_PACKED_BIT_UNSET:
+		return "Test invalid - cmd23 packed bit unset";
+	case TEST_CMD23_REL_WR_BIT_SET:
+		return "Test invalid - cmd23 rel wr bit set";
+	case TEST_CMD23_BITS_16TO29_SET:
+		return "Test invalid - cmd23 bits [16-29] set";
+	case TEST_CMD23_HDR_BLK_NOT_IN_COUNT:
+		return "Test invalid - cmd23 header block not in count";
+	case TEST_PACKING_EXP_N_OVER_TRIGGER:
+		return "\nTest packing control - pack n";
+	case TEST_PACKING_EXP_N_OVER_TRIGGER_FB_READ:
+		return "\nTest packing control - pack n followed by read";
+	case TEST_PACKING_EXP_N_OVER_TRIGGER_FLUSH_N:
+		return "\nTest packing control - pack n followed by flush";
+	case TEST_PACKING_EXP_ONE_OVER_TRIGGER_FB_READ:
+		return "\nTest packing control - pack one followed by read";
+	case TEST_PACKING_EXP_THRESHOLD_OVER_TRIGGER:
+		return "\nTest packing control - pack threshold";
+	case TEST_PACKING_NOT_EXP_LESS_THAN_TRIGGER_REQUESTS:
+		return "\nTest packing control - no packing";
+	case TEST_PACKING_NOT_EXP_TRIGGER_REQUESTS:
+		return "\nTest packing control - no packing, trigger requests";
+	case TEST_PACKING_NOT_EXP_TRIGGER_READ_TRIGGER:
+		return "\nTest packing control - no pack, trigger-read-trigger";
+	case TEST_PACKING_NOT_EXP_TRIGGER_FLUSH_TRIGGER:
+		return "\nTest packing control- no pack, trigger-flush-trigger";
+	case TEST_PACK_MIX_PACKED_NO_PACKED_PACKED:
+		return "\nTest packing control - mix: pack -> no pack -> pack";
+	case TEST_PACK_MIX_NO_PACKED_PACKED_NO_PACKED:
+		return "\nTest packing control - mix: no pack->pack->no pack";
+	default:
+		 return "Unknown testcase";
+	}
+
+	return NULL;
+}
+
+/*
+ * Compare individual testcase's statistics to the expected statistics:
+ * Compare stop reason and number of packing events
+ */
+static int check_wr_packing_statistics(struct test_data *td)
+{
+	struct mmc_wr_pack_stats *mmc_packed_stats;
+	struct mmc_queue *mq = td->req_q->queuedata;
+	int max_packed_reqs = mq->card->ext_csd.max_packed_writes;
+	int i;
+	struct mmc_card *card = mq->card;
+	struct mmc_wr_pack_stats expected_stats;
+	int *stop_reason;
+	int ret = 0;
+
+	if (!mq) {
+		test_pr_err("%s: NULL mq", __func__);
+		return -EINVAL;
+	}
+
+	expected_stats = mbtd->exp_packed_stats;
+
+	mmc_packed_stats = mmc_blk_get_packed_statistics(card);
+	if (!mmc_packed_stats) {
+		test_pr_err("%s: NULL mmc_packed_stats", __func__);
+		return -EINVAL;
+	}
+
+	if (!mmc_packed_stats->packing_events) {
+		test_pr_err("%s: NULL packing_events", __func__);
+		return -EINVAL;
+	}
+
+	spin_lock(&mmc_packed_stats->lock);
+
+	if (!mmc_packed_stats->enabled) {
+		test_pr_err("%s write packing statistics are not enabled",
+			     __func__);
+		ret = -EINVAL;
+		goto exit_err;
+	}
+
+	stop_reason = mmc_packed_stats->pack_stop_reason;
+
+	for (i = 1; i <= max_packed_reqs; ++i) {
+		if (mmc_packed_stats->packing_events[i] !=
+		    expected_stats.packing_events[i]) {
+			test_pr_err(
+			"%s: Wrong pack stats in index %d, got %d, expected %d",
+			__func__, i, mmc_packed_stats->packing_events[i],
+			       expected_stats.packing_events[i]);
+			if (td->fs_wr_reqs_during_test)
+				goto cancel_round;
+			ret = -EINVAL;
+			goto exit_err;
+		}
+	}
+
+	if (mmc_packed_stats->pack_stop_reason[EXCEEDS_SEGMENTS] !=
+	    expected_stats.pack_stop_reason[EXCEEDS_SEGMENTS]) {
+		test_pr_err(
+		"%s: Wrong pack stop reason EXCEEDS_SEGMENTS %d, expected %d",
+			__func__, stop_reason[EXCEEDS_SEGMENTS],
+		       expected_stats.pack_stop_reason[EXCEEDS_SEGMENTS]);
+		if (td->fs_wr_reqs_during_test)
+			goto cancel_round;
+		ret = -EINVAL;
+		goto exit_err;
+	}
+
+	if (mmc_packed_stats->pack_stop_reason[EXCEEDS_SECTORS] !=
+	    expected_stats.pack_stop_reason[EXCEEDS_SECTORS]) {
+		test_pr_err(
+		"%s: Wrong pack stop reason EXCEEDS_SECTORS %d, expected %d",
+			__func__, stop_reason[EXCEEDS_SECTORS],
+		       expected_stats.pack_stop_reason[EXCEEDS_SECTORS]);
+		if (td->fs_wr_reqs_during_test)
+			goto cancel_round;
+		ret = -EINVAL;
+		goto exit_err;
+	}
+
+	if (mmc_packed_stats->pack_stop_reason[WRONG_DATA_DIR] !=
+	    expected_stats.pack_stop_reason[WRONG_DATA_DIR]) {
+		test_pr_err(
+		"%s: Wrong pack stop reason WRONG_DATA_DIR %d, expected %d",
+		       __func__, stop_reason[WRONG_DATA_DIR],
+		       expected_stats.pack_stop_reason[WRONG_DATA_DIR]);
+		if (td->fs_wr_reqs_during_test)
+			goto cancel_round;
+		ret = -EINVAL;
+		goto exit_err;
+	}
+
+	if (mmc_packed_stats->pack_stop_reason[FLUSH_OR_DISCARD] !=
+	    expected_stats.pack_stop_reason[FLUSH_OR_DISCARD]) {
+		test_pr_err(
+		"%s: Wrong pack stop reason FLUSH_OR_DISCARD %d, expected %d",
+		       __func__, stop_reason[FLUSH_OR_DISCARD],
+		       expected_stats.pack_stop_reason[FLUSH_OR_DISCARD]);
+		if (td->fs_wr_reqs_during_test)
+			goto cancel_round;
+		ret = -EINVAL;
+		goto exit_err;
+	}
+
+	if (mmc_packed_stats->pack_stop_reason[EMPTY_QUEUE] !=
+	    expected_stats.pack_stop_reason[EMPTY_QUEUE]) {
+		test_pr_err(
+		"%s: Wrong pack stop reason EMPTY_QUEUE %d, expected %d",
+		       __func__, stop_reason[EMPTY_QUEUE],
+		       expected_stats.pack_stop_reason[EMPTY_QUEUE]);
+		if (td->fs_wr_reqs_during_test)
+			goto cancel_round;
+		ret = -EINVAL;
+		goto exit_err;
+	}
+
+	if (mmc_packed_stats->pack_stop_reason[REL_WRITE] !=
+	    expected_stats.pack_stop_reason[REL_WRITE]) {
+		test_pr_err(
+			"%s: Wrong pack stop reason REL_WRITE %d, expected %d",
+		       __func__, stop_reason[REL_WRITE],
+		       expected_stats.pack_stop_reason[REL_WRITE]);
+		if (td->fs_wr_reqs_during_test)
+			goto cancel_round;
+		ret = -EINVAL;
+		goto exit_err;
+	}
+
+exit_err:
+	spin_unlock(&mmc_packed_stats->lock);
+	if (ret && mmc_packed_stats->enabled)
+		print_mmc_packing_stats(card);
+	return ret;
+cancel_round:
+	spin_unlock(&mmc_packed_stats->lock);
+	test_iosched_set_ignore_round(true);
+	return 0;
+}
+
+/*
+ * Pseudo-randomly choose a seed based on the last seed, and update it in
+ * seed_number. then return seed_number (mod max_val), or min_val.
+ */
+static unsigned int pseudo_random_seed(unsigned int *seed_number,
+				       unsigned int min_val,
+				       unsigned int max_val)
+{
+	int ret = 0;
+
+	if (!seed_number)
+		return 0;
+
+	*seed_number = ((unsigned int)(((unsigned long)*seed_number *
+				(unsigned long)LARGE_PRIME_1) + LARGE_PRIME_2));
+	ret = (unsigned int)((*seed_number) % max_val);
+
+	return (ret > min_val ? ret : min_val);
+}
+
+/*
+ * Given a pseudo-random seed, find a pseudo-random num_of_bios.
+ * Make sure that num_of_bios is not larger than TEST_MAX_SECTOR_RANGE
+ */
+static void pseudo_rnd_num_of_bios(unsigned int *num_bios_seed,
+				   unsigned int *num_of_bios)
+{
+	do {
+		*num_of_bios = pseudo_random_seed(num_bios_seed, 1,
+						  TEST_MAX_BIOS_PER_REQ);
+		if (!(*num_of_bios))
+			*num_of_bios = 1;
+	} while ((*num_of_bios) * BIO_U32_SIZE * 4 > TEST_MAX_SECTOR_RANGE);
+}
+
+/* Add a single read request to the given td's request queue */
+static int prepare_request_add_read(struct test_data *td)
+{
+	int ret;
+	int start_sec;
+
+	if (td)
+		start_sec = td->start_sector;
+	else {
+		test_pr_err("%s: NULL td", __func__);
+		return 0;
+	}
+
+	test_pr_info("%s: Adding a read request, first req_id=%d", __func__,
+		     td->wr_rd_next_req_id);
+
+	ret = test_iosched_add_wr_rd_test_req(0, READ, start_sec, 2,
+					      TEST_PATTERN_5A, NULL);
+	if (ret) {
+		test_pr_err("%s: failed to add a read request", __func__);
+		return ret;
+	}
+
+	return 0;
+}
+
+/* Add a single flush request to the given td's request queue */
+static int prepare_request_add_flush(struct test_data *td)
+{
+	int ret;
+
+	if (!td) {
+		test_pr_err("%s: NULL td", __func__);
+		return 0;
+	}
+
+	test_pr_info("%s: Adding a flush request, first req_id=%d", __func__,
+		     td->unique_next_req_id);
+	ret = test_iosched_add_unique_test_req(0, REQ_UNIQUE_FLUSH,
+				  0, 0, NULL);
+	if (ret) {
+		test_pr_err("%s: failed to add a flush request", __func__);
+		return ret;
+	}
+
+	return ret;
+}
+
+/*
+ * Add num_requets amount of write requests to the given td's request queue.
+ * If random test mode is chosen we pseudo-randomly choose the number of bios
+ * for each write request, otherwise add between 1 to 5 bio per request.
+ */
+static int prepare_request_add_write_reqs(struct test_data *td,
+					  int num_requests, int is_err_expected,
+					  int is_random)
+{
+	int i;
+	unsigned int start_sec;
+	int num_bios;
+	int ret = 0;
+	unsigned int *bio_seed = &mbtd->random_test_seed;
+
+	if (td)
+		start_sec = td->start_sector;
+	else {
+		test_pr_err("%s: NULL td", __func__);
+		return ret;
+	}
+
+	test_pr_info("%s: Adding %d write requests, first req_id=%d", __func__,
+		     num_requests, td->wr_rd_next_req_id);
+
+	for (i = 1; i <= num_requests; i++) {
+		start_sec = td->start_sector + 4096 * td->num_of_write_bios;
+		if (is_random)
+			pseudo_rnd_num_of_bios(bio_seed, &num_bios);
+		else
+			/*
+			 * For the non-random case, give num_bios a value
+			 * between 1 and 5, to keep a small number of BIOs
+			 */
+			num_bios = (i%5)+1;
+
+		ret = test_iosched_add_wr_rd_test_req(is_err_expected, WRITE,
+				start_sec, num_bios, TEST_PATTERN_5A, NULL);
+
+		if (ret) {
+			test_pr_err("%s: failed to add a write request",
+				    __func__);
+			return ret;
+		}
+	}
+	return 0;
+}
+
+/*
+ * Prepare the write, read and flush requests for a generic packed commands
+ * testcase
+ */
+static int prepare_packed_requests(struct test_data *td, int is_err_expected,
+				   int num_requests, int is_random)
+{
+	int ret = 0;
+	struct mmc_queue *mq;
+	int max_packed_reqs;
+	struct request_queue *req_q;
+
+	if (!td) {
+		pr_err("%s: NULL td", __func__);
+		return -EINVAL;
+	}
+
+	req_q = td->req_q;
+
+	if (!req_q) {
+		pr_err("%s: NULL request queue", __func__);
+		return -EINVAL;
+	}
+
+	mq = req_q->queuedata;
+	if (!mq) {
+		test_pr_err("%s: NULL mq", __func__);
+		return -EINVAL;
+	}
+
+	max_packed_reqs = mq->card->ext_csd.max_packed_writes;
+
+	if (mbtd->random_test_seed <= 0) {
+		mbtd->random_test_seed =
+			(unsigned int)(get_jiffies_64() & 0xFFFF);
+		test_pr_info("%s: got seed from jiffies %d",
+			     __func__, mbtd->random_test_seed);
+	}
+
+	ret = prepare_request_add_write_reqs(td, num_requests, is_err_expected,
+					     is_random);
+	if (ret)
+		return ret;
+
+	/* Avoid memory corruption in upcoming stats set */
+	if (td->test_info.testcase == TEST_STOP_DUE_TO_THRESHOLD)
+		num_requests--;
+
+	memset((void *)mbtd->exp_packed_stats.pack_stop_reason, 0,
+		sizeof(mbtd->exp_packed_stats.pack_stop_reason));
+	memset(mbtd->exp_packed_stats.packing_events, 0,
+		(max_packed_reqs + 1) * sizeof(u32));
+	if (num_requests <= max_packed_reqs)
+		mbtd->exp_packed_stats.packing_events[num_requests] = 1;
+
+	switch (td->test_info.testcase) {
+	case TEST_STOP_DUE_TO_FLUSH:
+	case TEST_STOP_DUE_TO_FLUSH_AFTER_MAX_REQS:
+		ret = prepare_request_add_flush(td);
+		if (ret)
+			return ret;
+
+		mbtd->exp_packed_stats.pack_stop_reason[FLUSH_OR_DISCARD] = 1;
+		break;
+	case TEST_STOP_DUE_TO_READ:
+	case TEST_STOP_DUE_TO_READ_AFTER_MAX_REQS:
+		ret = prepare_request_add_read(td);
+		if (ret)
+			return ret;
+
+		mbtd->exp_packed_stats.pack_stop_reason[WRONG_DATA_DIR] = 1;
+		break;
+	case TEST_STOP_DUE_TO_THRESHOLD:
+		mbtd->exp_packed_stats.packing_events[num_requests] = 1;
+		mbtd->exp_packed_stats.packing_events[1] = 1;
+		mbtd->exp_packed_stats.pack_stop_reason[THRESHOLD] = 1;
+		mbtd->exp_packed_stats.pack_stop_reason[EMPTY_QUEUE] = 1;
+		break;
+	case TEST_STOP_DUE_TO_MAX_REQ_NUM:
+	case TEST_RET_PARTIAL_MAX_FAIL_IDX:
+		mbtd->exp_packed_stats.pack_stop_reason[THRESHOLD] = 1;
+		break;
+	default:
+		mbtd->exp_packed_stats.pack_stop_reason[EMPTY_QUEUE] = 1;
+	}
+	mbtd->num_requests = num_requests;
+
+	return 0;
+}
+
+/*
+ * Prepare the write, read and flush requests for the packing control
+ * testcases
+ */
+static int prepare_packed_control_tests_requests(struct test_data *td,
+			int is_err_expected, int num_requests, int is_random)
+{
+	int ret = 0;
+	struct mmc_queue *mq;
+	int max_packed_reqs;
+	int temp_num_req = num_requests;
+	struct request_queue *req_q;
+	int test_packed_trigger;
+	int num_packed_reqs;
+
+	if (!td) {
+		test_pr_err("%s: NULL td\n", __func__);
+		return -EINVAL;
+	}
+
+	req_q = td->req_q;
+
+	if (!req_q) {
+		test_pr_err("%s: NULL request queue\n", __func__);
+		return -EINVAL;
+	}
+
+	mq = req_q->queuedata;
+	if (!mq) {
+		test_pr_err("%s: NULL mq", __func__);
+		return -EINVAL;
+	}
+
+	max_packed_reqs = mq->card->ext_csd.max_packed_writes;
+	test_packed_trigger = mq->num_wr_reqs_to_start_packing;
+	num_packed_reqs = num_requests - test_packed_trigger;
+
+	if (mbtd->random_test_seed == 0) {
+		mbtd->random_test_seed =
+			(unsigned int)(get_jiffies_64() & 0xFFFF);
+		test_pr_info("%s: got seed from jiffies %d",
+			     __func__, mbtd->random_test_seed);
+	}
+
+	if (td->test_info.testcase ==
+			TEST_PACK_MIX_NO_PACKED_PACKED_NO_PACKED) {
+		temp_num_req = num_requests;
+		num_requests = test_packed_trigger - 1;
+	}
+
+	/* Verify that the packing is disabled before starting the test */
+	mq->wr_packing_enabled = false;
+	mq->num_of_potential_packed_wr_reqs = 0;
+
+	if (td->test_info.testcase == TEST_PACK_MIX_PACKED_NO_PACKED_PACKED) {
+		mq->num_of_potential_packed_wr_reqs = test_packed_trigger + 1;
+		mq->wr_packing_enabled = true;
+		num_requests = test_packed_trigger + 2;
+	}
+
+	ret = prepare_request_add_write_reqs(td, num_requests, is_err_expected,
+					     is_random);
+	if (ret)
+		goto exit;
+
+	if (td->test_info.testcase == TEST_PACK_MIX_NO_PACKED_PACKED_NO_PACKED)
+		num_requests = temp_num_req;
+
+	memset((void *)mbtd->exp_packed_stats.pack_stop_reason, 0,
+		sizeof(mbtd->exp_packed_stats.pack_stop_reason));
+	memset(mbtd->exp_packed_stats.packing_events, 0,
+		(max_packed_reqs + 1) * sizeof(u32));
+
+	switch (td->test_info.testcase) {
+	case TEST_PACKING_EXP_N_OVER_TRIGGER_FB_READ:
+	case TEST_PACKING_EXP_ONE_OVER_TRIGGER_FB_READ:
+		ret = prepare_request_add_read(td);
+		if (ret)
+			goto exit;
+
+		mbtd->exp_packed_stats.pack_stop_reason[WRONG_DATA_DIR] = 1;
+		mbtd->exp_packed_stats.packing_events[num_packed_reqs] = 1;
+		break;
+	case TEST_PACKING_EXP_N_OVER_TRIGGER_FLUSH_N:
+		ret = prepare_request_add_flush(td);
+		if (ret)
+			goto exit;
+
+		ret = prepare_request_add_write_reqs(td, num_packed_reqs,
+					     is_err_expected, is_random);
+		if (ret)
+			goto exit;
+
+		mbtd->exp_packed_stats.pack_stop_reason[EMPTY_QUEUE] = 1;
+		mbtd->exp_packed_stats.pack_stop_reason[FLUSH_OR_DISCARD] = 1;
+		mbtd->exp_packed_stats.packing_events[num_packed_reqs] = 2;
+		break;
+	case TEST_PACKING_NOT_EXP_TRIGGER_READ_TRIGGER:
+		ret = prepare_request_add_read(td);
+		if (ret)
+			goto exit;
+
+		ret = prepare_request_add_write_reqs(td, test_packed_trigger,
+						    is_err_expected, is_random);
+		if (ret)
+			goto exit;
+
+		mbtd->exp_packed_stats.packing_events[num_packed_reqs] = 1;
+		break;
+	case TEST_PACKING_NOT_EXP_TRIGGER_FLUSH_TRIGGER:
+		ret = prepare_request_add_flush(td);
+		if (ret)
+			goto exit;
+
+		ret = prepare_request_add_write_reqs(td, test_packed_trigger,
+						    is_err_expected, is_random);
+		if (ret)
+			goto exit;
+
+		mbtd->exp_packed_stats.packing_events[num_packed_reqs] = 1;
+		break;
+	case TEST_PACK_MIX_PACKED_NO_PACKED_PACKED:
+		ret = prepare_request_add_read(td);
+		if (ret)
+			goto exit;
+
+		ret = prepare_request_add_write_reqs(td, test_packed_trigger-1,
+						    is_err_expected, is_random);
+		if (ret)
+			goto exit;
+
+		ret = prepare_request_add_write_reqs(td, num_requests,
+						    is_err_expected, is_random);
+		if (ret)
+			goto exit;
+
+		mbtd->exp_packed_stats.packing_events[num_requests] = 1;
+		mbtd->exp_packed_stats.packing_events[num_requests-1] = 1;
+		mbtd->exp_packed_stats.pack_stop_reason[WRONG_DATA_DIR] = 1;
+		mbtd->exp_packed_stats.pack_stop_reason[EMPTY_QUEUE] = 1;
+		break;
+	case TEST_PACK_MIX_NO_PACKED_PACKED_NO_PACKED:
+		ret = prepare_request_add_read(td);
+		if (ret)
+			goto exit;
+
+		ret = prepare_request_add_write_reqs(td, num_requests,
+						    is_err_expected, is_random);
+		if (ret)
+			goto exit;
+
+		ret = prepare_request_add_read(td);
+		if (ret)
+			goto exit;
+
+		ret = prepare_request_add_write_reqs(td, test_packed_trigger-1,
+						    is_err_expected, is_random);
+		if (ret)
+			goto exit;
+
+		mbtd->exp_packed_stats.pack_stop_reason[WRONG_DATA_DIR] = 1;
+		mbtd->exp_packed_stats.packing_events[num_packed_reqs] = 1;
+		break;
+	case TEST_PACKING_NOT_EXP_LESS_THAN_TRIGGER_REQUESTS:
+	case TEST_PACKING_NOT_EXP_TRIGGER_REQUESTS:
+		mbtd->exp_packed_stats.packing_events[num_packed_reqs] = 1;
+		break;
+	default:
+		mbtd->exp_packed_stats.pack_stop_reason[EMPTY_QUEUE] = 1;
+		mbtd->exp_packed_stats.packing_events[num_packed_reqs] = 1;
+	}
+	mbtd->num_requests = num_requests;
+
+exit:
+	return ret;
+}
+
+/*
+ * Prepare requests for the TEST_RET_PARTIAL_FOLLOWED_BY_ABORT testcase.
+ * In this testcase we have mixed error expectations from different
+ * write requests, hence the special prepare function.
+ */
+static int prepare_partial_followed_by_abort(struct test_data *td,
+					      int num_requests)
+{
+	int i, start_address;
+	int is_err_expected = 0;
+	int ret = 0;
+	struct mmc_queue *mq = test_iosched_get_req_queue()->queuedata;
+	int max_packed_reqs;
+
+	if (!mq) {
+		test_pr_err("%s: NULL mq", __func__);
+		return -EINVAL;
+	}
+
+	max_packed_reqs = mq->card->ext_csd.max_packed_writes;
+
+	for (i = 1; i <= num_requests; i++) {
+		if (i > (num_requests / 2))
+			is_err_expected = 1;
+
+		start_address = td->start_sector + 4096 * td->num_of_write_bios;
+		ret = test_iosched_add_wr_rd_test_req(is_err_expected, WRITE,
+				start_address, (i % 5) + 1, TEST_PATTERN_5A,
+				NULL);
+		if (ret) {
+			test_pr_err("%s: failed to add a write request",
+				    __func__);
+			return ret;
+		}
+	}
+
+	memset((void *)&mbtd->exp_packed_stats.pack_stop_reason, 0,
+		sizeof(mbtd->exp_packed_stats.pack_stop_reason));
+	memset(mbtd->exp_packed_stats.packing_events, 0,
+		(max_packed_reqs + 1) * sizeof(u32));
+	mbtd->exp_packed_stats.packing_events[num_requests] = 1;
+	mbtd->exp_packed_stats.pack_stop_reason[EMPTY_QUEUE] = 1;
+
+	mbtd->num_requests = num_requests;
+
+	return ret;
+}
+
+/*
+ * Get number of write requests for current testcase. If random test mode was
+ * chosen, pseudo-randomly choose the number of requests, otherwise set to
+ * two less than the packing threshold.
+ */
+static int get_num_requests(struct test_data *td)
+{
+	int *seed = &mbtd->random_test_seed;
+	struct request_queue *req_q;
+	struct mmc_queue *mq;
+	int max_num_requests;
+	int num_requests;
+	int min_num_requests = 2;
+	int is_random = mbtd->is_random;
+	int max_for_double;
+	int test_packed_trigger;
+
+	req_q = test_iosched_get_req_queue();
+	if (req_q)
+		mq = req_q->queuedata;
+	else {
+		test_pr_err("%s: NULL request queue", __func__);
+		return 0;
+	}
+
+	if (!mq) {
+		test_pr_err("%s: NULL mq", __func__);
+		return -EINVAL;
+	}
+
+	max_num_requests = mq->card->ext_csd.max_packed_writes;
+	num_requests = max_num_requests - 2;
+	test_packed_trigger = mq->num_wr_reqs_to_start_packing;
+
+	/*
+	 * Here max_for_double is intended for packed control testcases
+	 * in which we issue many write requests. It's purpose is to prevent
+	 * exceeding max number of req_queue requests.
+	 */
+	max_for_double = max_num_requests - 10;
+
+	if (td->test_info.testcase ==
+				TEST_PACKING_NOT_EXP_LESS_THAN_TRIGGER_REQUESTS)
+		/* Don't expect packing, so issue up to trigger-1 reqs */
+		num_requests = test_packed_trigger - 1;
+
+	if (is_random) {
+		if (td->test_info.testcase ==
+		    TEST_RET_PARTIAL_FOLLOWED_BY_ABORT)
+			/*
+			 * Here we don't want num_requests to be less than 1
+			 * as a consequence of division by 2.
+			 */
+			min_num_requests = 3;
+
+		if (td->test_info.testcase ==
+				TEST_PACKING_NOT_EXP_LESS_THAN_TRIGGER_REQUESTS)
+			/* Don't expect packing, so issue up to trigger reqs */
+			max_num_requests = test_packed_trigger;
+
+		num_requests = pseudo_random_seed(seed, min_num_requests,
+						  max_num_requests - 1);
+	}
+
+	if (td->test_info.testcase ==
+				TEST_PACKING_NOT_EXP_LESS_THAN_TRIGGER_REQUESTS)
+		num_requests -= test_packed_trigger;
+
+	if (td->test_info.testcase == TEST_PACKING_EXP_N_OVER_TRIGGER_FLUSH_N)
+		num_requests =
+		num_requests > max_for_double ? max_for_double : num_requests;
+
+	if (mbtd->test_group == TEST_PACKING_CONTROL_GROUP)
+		num_requests += test_packed_trigger;
+
+	if (td->test_info.testcase == TEST_PACKING_NOT_EXP_TRIGGER_REQUESTS)
+		num_requests = test_packed_trigger;
+
+	return num_requests;
+}
+
+/*
+ * An implementation for the prepare_test_fn pointer in the test_info
+ * data structure. According to the testcase we add the right number of requests
+ * and decide if an error is expected or not.
+ */
+static int prepare_test(struct test_data *td)
+{
+	struct mmc_queue *mq = test_iosched_get_req_queue()->queuedata;
+	int max_num_requests;
+	int num_requests = 0;
+	int ret = 0;
+	int is_random = mbtd->is_random;
+	int test_packed_trigger = mq->num_wr_reqs_to_start_packing;
+
+	if (!mq) {
+		test_pr_err("%s: NULL mq", __func__);
+		return -EINVAL;
+	}
+
+	max_num_requests = mq->card->ext_csd.max_packed_writes;
+
+	if (is_random && mbtd->random_test_seed == 0) {
+		mbtd->random_test_seed =
+			(unsigned int)(get_jiffies_64() & 0xFFFF);
+		test_pr_info("%s: got seed from jiffies %d",
+			__func__, mbtd->random_test_seed);
+	}
+
+	num_requests = get_num_requests(td);
+
+	if (mbtd->test_group == TEST_SEND_INVALID_GROUP)
+		mq->packed_test_fn =
+				test_invalid_packed_cmd;
+
+	if (mbtd->test_group == TEST_ERR_CHECK_GROUP)
+		mq->err_check_fn = test_err_check;
+
+	switch (td->test_info.testcase) {
+	case TEST_STOP_DUE_TO_FLUSH:
+	case TEST_STOP_DUE_TO_READ:
+	case TEST_RET_PARTIAL_FOLLOWED_BY_SUCCESS:
+	case TEST_RET_PARTIAL_MULTIPLE_UNTIL_SUCCESS:
+	case TEST_STOP_DUE_TO_EMPTY_QUEUE:
+	case TEST_CMD23_PACKED_BIT_UNSET:
+		ret = prepare_packed_requests(td, 0, num_requests, is_random);
+		break;
+	case TEST_STOP_DUE_TO_FLUSH_AFTER_MAX_REQS:
+	case TEST_STOP_DUE_TO_READ_AFTER_MAX_REQS:
+		ret = prepare_packed_requests(td, 0, max_num_requests - 1,
+					      is_random);
+		break;
+	case TEST_RET_PARTIAL_FOLLOWED_BY_ABORT:
+		ret = prepare_partial_followed_by_abort(td, num_requests);
+		break;
+	case TEST_STOP_DUE_TO_MAX_REQ_NUM:
+	case TEST_RET_PARTIAL_MAX_FAIL_IDX:
+		ret = prepare_packed_requests(td, 0, max_num_requests,
+					      is_random);
+		break;
+	case TEST_STOP_DUE_TO_THRESHOLD:
+		ret = prepare_packed_requests(td, 0, max_num_requests + 1,
+					      is_random);
+		break;
+	case TEST_RET_ABORT:
+	case TEST_RET_RETRY:
+	case TEST_RET_CMD_ERR:
+	case TEST_RET_DATA_ERR:
+	case TEST_HDR_INVALID_VERSION:
+	case TEST_HDR_WRONG_WRITE_CODE:
+	case TEST_HDR_INVALID_RW_CODE:
+	case TEST_HDR_DIFFERENT_ADDRESSES:
+	case TEST_HDR_REQ_NUM_SMALLER_THAN_ACTUAL:
+	case TEST_HDR_REQ_NUM_LARGER_THAN_ACTUAL:
+	case TEST_CMD23_MAX_PACKED_WRITES:
+	case TEST_CMD23_ZERO_PACKED_WRITES:
+	case TEST_CMD23_REL_WR_BIT_SET:
+	case TEST_CMD23_BITS_16TO29_SET:
+	case TEST_CMD23_HDR_BLK_NOT_IN_COUNT:
+	case TEST_HDR_CMD23_PACKED_BIT_SET:
+		ret = prepare_packed_requests(td, 1, num_requests, is_random);
+		break;
+	case TEST_PACKING_EXP_N_OVER_TRIGGER:
+	case TEST_PACKING_EXP_N_OVER_TRIGGER_FB_READ:
+	case TEST_PACKING_NOT_EXP_TRIGGER_REQUESTS:
+	case TEST_PACKING_NOT_EXP_LESS_THAN_TRIGGER_REQUESTS:
+	case TEST_PACK_MIX_PACKED_NO_PACKED_PACKED:
+	case TEST_PACK_MIX_NO_PACKED_PACKED_NO_PACKED:
+		ret = prepare_packed_control_tests_requests(td, 0, num_requests,
+			is_random);
+		break;
+	case TEST_PACKING_EXP_THRESHOLD_OVER_TRIGGER:
+		ret = prepare_packed_control_tests_requests(td, 0,
+			max_num_requests, is_random);
+		break;
+	case TEST_PACKING_EXP_ONE_OVER_TRIGGER_FB_READ:
+		ret = prepare_packed_control_tests_requests(td, 0,
+			test_packed_trigger + 1,
+					is_random);
+		break;
+	case TEST_PACKING_EXP_N_OVER_TRIGGER_FLUSH_N:
+		ret = prepare_packed_control_tests_requests(td, 0, num_requests,
+			is_random);
+		break;
+	case TEST_PACKING_NOT_EXP_TRIGGER_READ_TRIGGER:
+	case TEST_PACKING_NOT_EXP_TRIGGER_FLUSH_TRIGGER:
+		ret = prepare_packed_control_tests_requests(td, 0,
+			test_packed_trigger, is_random);
+		break;
+	default:
+		test_pr_info("%s: Invalid test case...", __func__);
+		return -EINVAL;
+	}
+
+	return ret;
+}
+
+static int run_packed_test(struct test_data *td)
+{
+	struct mmc_queue *mq;
+	struct request_queue *req_q;
+
+	if (!td) {
+		pr_err("%s: NULL td", __func__);
+		return -EINVAL;
+	}
+
+	req_q = td->req_q;
+
+	if (!req_q) {
+		pr_err("%s: NULL request queue", __func__);
+		return -EINVAL;
+	}
+
+	mq = req_q->queuedata;
+	if (!mq) {
+		test_pr_err("%s: NULL mq", __func__);
+		return -EINVAL;
+	}
+	mmc_blk_init_packed_statistics(mq->card);
+
+	if (td->test_info.testcase != TEST_PACK_MIX_PACKED_NO_PACKED_PACKED) {
+		/*
+		 * Verify that the packing is disabled before starting the
+		 * test
+		 */
+		mq->wr_packing_enabled = false;
+		mq->num_of_potential_packed_wr_reqs = 0;
+	}
+
+	__blk_run_queue(td->req_q);
+
+	return 0;
+}
+
+/*
+ * An implementation for the post_test_fn in the test_info data structure.
+ * In our case we just reset the function pointers in the mmc_queue in order for
+ * the FS to be able to dispatch it's requests correctly after the test is
+ * finished.
+ */
+static int post_test(struct test_data *td)
+{
+	struct mmc_queue *mq;
+
+	if (!td)
+		return -EINVAL;
+
+	mq = td->req_q->queuedata;
+
+	if (!mq) {
+		test_pr_err("%s: NULL mq", __func__);
+		return -EINVAL;
+	}
+
+	mq->packed_test_fn = NULL;
+	mq->err_check_fn = NULL;
+
+	return 0;
+}
+
+/*
+ * This function checks, based on the current test's test_group, that the
+ * packed commands capability and control are set right. In addition, we check
+ * if the card supports the packed command feature.
+ */
+static int validate_packed_commands_settings(void)
+{
+	struct request_queue *req_q;
+	struct mmc_queue *mq;
+	int max_num_requests;
+	struct mmc_host *host;
+
+	req_q = test_iosched_get_req_queue();
+	if (!req_q) {
+		test_pr_err("%s: test_iosched_get_req_queue failed", __func__);
+		test_iosched_set_test_result(TEST_FAILED);
+		return -EINVAL;
+	}
+
+	mq = req_q->queuedata;
+	if (!mq) {
+		test_pr_err("%s: NULL mq", __func__);
+		return -EINVAL;
+	}
+
+	max_num_requests = mq->card->ext_csd.max_packed_writes;
+	host = mq->card->host;
+
+	if (!(host->caps2 && MMC_CAP2_PACKED_WR)) {
+		test_pr_err("%s: Packed Write capability disabled, exit test",
+			    __func__);
+		test_iosched_set_test_result(TEST_NOT_SUPPORTED);
+		return -EINVAL;
+	}
+
+	if (max_num_requests == 0) {
+		test_pr_err(
+		"%s: no write packing support, ext_csd.max_packed_writes=%d",
+		__func__, mq->card->ext_csd.max_packed_writes);
+		test_iosched_set_test_result(TEST_NOT_SUPPORTED);
+		return -EINVAL;
+	}
+
+	test_pr_info("%s: max number of packed requests supported is %d ",
+		     __func__, max_num_requests);
+
+	switch (mbtd->test_group) {
+	case TEST_SEND_WRITE_PACKING_GROUP:
+	case TEST_ERR_CHECK_GROUP:
+	case TEST_SEND_INVALID_GROUP:
+		/* disable the packing control */
+		host->caps2 &= ~MMC_CAP2_PACKED_WR_CONTROL;
+		break;
+	case TEST_PACKING_CONTROL_GROUP:
+		host->caps2 |=  MMC_CAP2_PACKED_WR_CONTROL;
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static bool message_repeat;
+static int test_open(struct inode *inode, struct file *file)
+{
+	file->private_data = inode->i_private;
+	message_repeat = 1;
+	return 0;
+}
+
+/* send_packing TEST */
+static ssize_t send_write_packing_test_write(struct file *file,
+				const char __user *buf,
+				size_t count,
+				loff_t *ppos)
+{
+	int ret = 0;
+	int i = 0;
+	int number = -1;
+	int j = 0;
+
+	test_pr_info("%s: -- send_write_packing TEST --", __func__);
+
+	sscanf(buf, "%d", &number);
+
+	if (number <= 0)
+		number = 1;
+
+
+	mbtd->test_group = TEST_SEND_WRITE_PACKING_GROUP;
+
+	if (validate_packed_commands_settings())
+		return count;
+
+	if (mbtd->random_test_seed > 0)
+		test_pr_info("%s: Test seed: %d", __func__,
+			      mbtd->random_test_seed);
+
+	memset(&mbtd->test_info, 0, sizeof(struct test_info));
+
+	mbtd->test_info.data = mbtd;
+	mbtd->test_info.prepare_test_fn = prepare_test;
+	mbtd->test_info.run_test_fn = run_packed_test;
+	mbtd->test_info.check_test_result_fn = check_wr_packing_statistics;
+	mbtd->test_info.get_test_case_str_fn = get_test_case_str;
+	mbtd->test_info.post_test_fn = post_test;
+
+	for (i = 0; i < number; ++i) {
+		test_pr_info("%s: Cycle # %d / %d", __func__, i+1, number);
+		test_pr_info("%s: ====================", __func__);
+
+		for (j = SEND_WRITE_PACKING_MIN_TESTCASE;
+		      j <= SEND_WRITE_PACKING_MAX_TESTCASE; j++) {
+
+			mbtd->test_info.testcase = j;
+			mbtd->is_random = RANDOM_TEST;
+			ret = test_iosched_start_test(&mbtd->test_info);
+			if (ret)
+				break;
+			/* Allow FS requests to be dispatched */
+			msleep(1000);
+			mbtd->test_info.testcase = j;
+			mbtd->is_random = NON_RANDOM_TEST;
+			ret = test_iosched_start_test(&mbtd->test_info);
+			if (ret)
+				break;
+			/* Allow FS requests to be dispatched */
+			msleep(1000);
+		}
+	}
+
+	test_pr_info("%s: Completed all the test cases.", __func__);
+
+	return count;
+}
+
+static ssize_t send_write_packing_test_read(struct file *file,
+			       char __user *buffer,
+			       size_t count,
+			       loff_t *offset)
+{
+	memset((void *)buffer, 0, count);
+
+	snprintf(buffer, count,
+		 "\nsend_write_packing_test\n"
+		 "=========\n"
+		 "Description:\n"
+		 "This test checks the following scenarios\n"
+		 "- Pack due to FLUSH message\n"
+		 "- Pack due to FLUSH after threshold writes\n"
+		 "- Pack due to READ message\n"
+		 "- Pack due to READ after threshold writes\n"
+		 "- Pack due to empty queue\n"
+		 "- Pack due to threshold writes\n"
+		 "- Pack due to one over threshold writes\n");
+
+	if (message_repeat == 1) {
+		message_repeat = 0;
+		return strnlen(buffer, count);
+	} else {
+		return 0;
+	}
+}
+
+const struct file_operations send_write_packing_test_ops = {
+	.open = test_open,
+	.write = send_write_packing_test_write,
+	.read = send_write_packing_test_read,
+};
+
+/* err_check TEST */
+static ssize_t err_check_test_write(struct file *file,
+				const char __user *buf,
+				size_t count,
+				loff_t *ppos)
+{
+	int ret = 0;
+	int i = 0;
+	int number = -1;
+	int j = 0;
+
+	test_pr_info("%s: -- err_check TEST --", __func__);
+
+	sscanf(buf, "%d", &number);
+
+	if (number <= 0)
+		number = 1;
+
+	mbtd->test_group = TEST_ERR_CHECK_GROUP;
+
+	if (validate_packed_commands_settings())
+		return count;
+
+	if (mbtd->random_test_seed > 0)
+		test_pr_info("%s: Test seed: %d", __func__,
+			      mbtd->random_test_seed);
+
+	memset(&mbtd->test_info, 0, sizeof(struct test_info));
+
+	mbtd->test_info.data = mbtd;
+	mbtd->test_info.prepare_test_fn = prepare_test;
+	mbtd->test_info.run_test_fn = run_packed_test;
+	mbtd->test_info.check_test_result_fn = check_wr_packing_statistics;
+	mbtd->test_info.get_test_case_str_fn = get_test_case_str;
+	mbtd->test_info.post_test_fn = post_test;
+
+	for (i = 0; i < number; ++i) {
+		test_pr_info("%s: Cycle # %d / %d", __func__, i+1, number);
+		test_pr_info("%s: ====================", __func__);
+
+		for (j = ERR_CHECK_MIN_TESTCASE;
+					j <= ERR_CHECK_MAX_TESTCASE ; j++) {
+			mbtd->test_info.testcase = j;
+			mbtd->is_random = RANDOM_TEST;
+			ret = test_iosched_start_test(&mbtd->test_info);
+			if (ret)
+				break;
+			/* Allow FS requests to be dispatched */
+			msleep(1000);
+			mbtd->test_info.testcase = j;
+			mbtd->is_random = NON_RANDOM_TEST;
+			ret = test_iosched_start_test(&mbtd->test_info);
+			if (ret)
+				break;
+			/* Allow FS requests to be dispatched */
+			msleep(1000);
+		}
+	}
+
+	test_pr_info("%s: Completed all the test cases.", __func__);
+
+	return count;
+}
+
+static ssize_t err_check_test_read(struct file *file,
+			       char __user *buffer,
+			       size_t count,
+			       loff_t *offset)
+{
+	memset((void *)buffer, 0, count);
+
+	snprintf(buffer, count,
+		 "\nerr_check_TEST\n"
+		 "=========\n"
+		 "Description:\n"
+		 "This test checks the following scenarios\n"
+		 "- Return ABORT\n"
+		 "- Return PARTIAL followed by success\n"
+		 "- Return PARTIAL followed by abort\n"
+		 "- Return PARTIAL multiple times until success\n"
+		 "- Return PARTIAL with fail index = threshold\n"
+		 "- Return RETRY\n"
+		 "- Return CMD_ERR\n"
+		 "- Return DATA_ERR\n");
+
+	if (message_repeat == 1) {
+		message_repeat = 0;
+		return strnlen(buffer, count);
+	} else {
+		return 0;
+	}
+}
+
+const struct file_operations err_check_test_ops = {
+	.open = test_open,
+	.write = err_check_test_write,
+	.read = err_check_test_read,
+};
+
+/* send_invalid_packed TEST */
+static ssize_t send_invalid_packed_test_write(struct file *file,
+				const char __user *buf,
+				size_t count,
+				loff_t *ppos)
+{
+	int ret = 0;
+	int i = 0;
+	int number = -1;
+	int j = 0;
+	int num_of_failures = 0;
+
+	test_pr_info("%s: -- send_invalid_packed TEST --", __func__);
+
+	sscanf(buf, "%d", &number);
+
+	if (number <= 0)
+		number = 1;
+
+	mbtd->test_group = TEST_SEND_INVALID_GROUP;
+
+	if (validate_packed_commands_settings())
+		return count;
+
+	if (mbtd->random_test_seed > 0)
+		test_pr_info("%s: Test seed: %d", __func__,
+			      mbtd->random_test_seed);
+
+	memset(&mbtd->test_info, 0, sizeof(struct test_info));
+
+	mbtd->test_info.data = mbtd;
+	mbtd->test_info.prepare_test_fn = prepare_test;
+	mbtd->test_info.run_test_fn = run_packed_test;
+	mbtd->test_info.check_test_result_fn = check_wr_packing_statistics;
+	mbtd->test_info.get_test_case_str_fn = get_test_case_str;
+	mbtd->test_info.post_test_fn = post_test;
+
+	for (i = 0; i < number; ++i) {
+		test_pr_info("%s: Cycle # %d / %d", __func__, i+1, number);
+		test_pr_info("%s: ====================", __func__);
+
+		for (j = INVALID_CMD_MIN_TESTCASE;
+				j <= INVALID_CMD_MAX_TESTCASE ; j++) {
+
+			mbtd->test_info.testcase = j;
+			mbtd->is_random = RANDOM_TEST;
+			ret = test_iosched_start_test(&mbtd->test_info);
+			if (ret)
+				num_of_failures++;
+			/* Allow FS requests to be dispatched */
+			msleep(1000);
+
+			mbtd->test_info.testcase = j;
+			mbtd->is_random = NON_RANDOM_TEST;
+			ret = test_iosched_start_test(&mbtd->test_info);
+			if (ret)
+				num_of_failures++;
+			/* Allow FS requests to be dispatched */
+			msleep(1000);
+		}
+	}
+
+	test_pr_info("%s: Completed all the test cases.", __func__);
+
+	if (num_of_failures > 0) {
+		test_iosched_set_test_result(TEST_FAILED);
+		test_pr_err(
+			"There were %d failures during the test, TEST FAILED",
+			num_of_failures);
+	}
+	return count;
+}
+
+static ssize_t send_invalid_packed_test_read(struct file *file,
+			       char __user *buffer,
+			       size_t count,
+			       loff_t *offset)
+{
+	memset((void *)buffer, 0, count);
+
+	snprintf(buffer, count,
+		 "\nsend_invalid_packed_TEST\n"
+		 "=========\n"
+		 "Description:\n"
+		 "This test checks the following scenarios\n"
+		 "- Send an invalid header version\n"
+		 "- Send the wrong write code\n"
+		 "- Send an invalid R/W code\n"
+		 "- Send wrong start address in header\n"
+		 "- Send header with block_count smaller than actual\n"
+		 "- Send header with block_count larger than actual\n"
+		 "- Send header CMD23 packed bit set\n"
+		 "- Send CMD23 with block count over threshold\n"
+		 "- Send CMD23 with block_count equals zero\n"
+		 "- Send CMD23 packed bit unset\n"
+		 "- Send CMD23 reliable write bit set\n"
+		 "- Send CMD23 bits [16-29] set\n"
+		 "- Send CMD23 header block not in block_count\n");
+
+	if (message_repeat == 1) {
+		message_repeat = 0;
+		return strnlen(buffer, count);
+	} else {
+		return 0;
+	}
+}
+
+const struct file_operations send_invalid_packed_test_ops = {
+	.open = test_open,
+	.write = send_invalid_packed_test_write,
+	.read = send_invalid_packed_test_read,
+};
+
+/* packing_control TEST */
+static ssize_t write_packing_control_test_write(struct file *file,
+				const char __user *buf,
+				size_t count,
+				loff_t *ppos)
+{
+	int ret = 0;
+	int i = 0;
+	int number = -1;
+	int j = 0;
+	struct mmc_queue *mq = test_iosched_get_req_queue()->queuedata;
+	int max_num_requests = mq->card->ext_csd.max_packed_writes;
+	int test_successful = 1;
+
+	test_pr_info("%s: -- write_packing_control TEST --", __func__);
+
+	sscanf(buf, "%d", &number);
+
+	if (number <= 0)
+		number = 1;
+
+	test_pr_info("%s: max_num_requests = %d ", __func__,
+			max_num_requests);
+
+	memset(&mbtd->test_info, 0, sizeof(struct test_info));
+	mbtd->test_group = TEST_PACKING_CONTROL_GROUP;
+
+	if (validate_packed_commands_settings())
+		return count;
+
+	mbtd->test_info.data = mbtd;
+	mbtd->test_info.prepare_test_fn = prepare_test;
+	mbtd->test_info.run_test_fn = run_packed_test;
+	mbtd->test_info.check_test_result_fn = check_wr_packing_statistics;
+	mbtd->test_info.get_test_case_str_fn = get_test_case_str;
+
+	for (i = 0; i < number; ++i) {
+		test_pr_info("%s: Cycle # %d / %d", __func__, i+1, number);
+		test_pr_info("%s: ====================", __func__);
+
+		for (j = PACKING_CONTROL_MIN_TESTCASE;
+				j <= PACKING_CONTROL_MAX_TESTCASE; j++) {
+
+			test_successful = 1;
+			mbtd->test_info.testcase = j;
+			mbtd->is_random = RANDOM_TEST;
+			ret = test_iosched_start_test(&mbtd->test_info);
+			if (ret) {
+				test_successful = 0;
+				break;
+			}
+			/* Allow FS requests to be dispatched */
+			msleep(1000);
+
+			mbtd->test_info.testcase = j;
+			mbtd->is_random = NON_RANDOM_TEST;
+			ret = test_iosched_start_test(&mbtd->test_info);
+			if (ret) {
+				test_successful = 0;
+				break;
+			}
+			/* Allow FS requests to be dispatched */
+			msleep(1000);
+		}
+
+		if (!test_successful)
+			break;
+	}
+
+	test_pr_info("%s: Completed all the test cases.", __func__);
+
+	return count;
+}
+
+static ssize_t write_packing_control_test_read(struct file *file,
+			       char __user *buffer,
+			       size_t count,
+			       loff_t *offset)
+{
+	memset((void *)buffer, 0, count);
+
+	snprintf(buffer, count,
+		 "\nwrite_packing_control_test\n"
+		 "=========\n"
+		 "Description:\n"
+		 "This test checks the following scenarios\n"
+		 "- Packing expected - one over trigger\n"
+		 "- Packing expected - N over trigger\n"
+		 "- Packing expected - N over trigger followed by read\n"
+		 "- Packing expected - N over trigger followed by flush\n"
+		 "- Packing expected - threshold over trigger FB by flush\n"
+		 "- Packing not expected - less than trigger\n"
+		 "- Packing not expected - trigger requests\n"
+		 "- Packing not expected - trigger, read, trigger\n"
+		 "- Mixed state - packing -> no packing -> packing\n"
+		 "- Mixed state - no packing -> packing -> no packing\n");
+
+	if (message_repeat == 1) {
+		message_repeat = 0;
+		return strnlen(buffer, count);
+	} else {
+		return 0;
+	}
+}
+
+const struct file_operations write_packing_control_test_ops = {
+	.open = test_open,
+	.write = write_packing_control_test_write,
+	.read = write_packing_control_test_read,
+};
+
+static void mmc_block_test_debugfs_cleanup(void)
+{
+	debugfs_remove(mbtd->debug.random_test_seed);
+	debugfs_remove(mbtd->debug.send_write_packing_test);
+	debugfs_remove(mbtd->debug.err_check_test);
+	debugfs_remove(mbtd->debug.send_invalid_packed_test);
+	debugfs_remove(mbtd->debug.packing_control_test);
+}
+
+static int mmc_block_test_debugfs_init(void)
+{
+	struct dentry *utils_root, *tests_root;
+
+	utils_root = test_iosched_get_debugfs_utils_root();
+	tests_root = test_iosched_get_debugfs_tests_root();
+
+	if (!utils_root || !tests_root)
+		return -EINVAL;
+
+	mbtd->debug.random_test_seed = debugfs_create_u32(
+					"random_test_seed",
+					S_IRUGO | S_IWUGO,
+					utils_root,
+					&mbtd->random_test_seed);
+
+	if (!mbtd->debug.random_test_seed)
+		goto err_nomem;
+
+	mbtd->debug.send_write_packing_test =
+		debugfs_create_file("send_write_packing_test",
+				    S_IRUGO | S_IWUGO,
+				    tests_root,
+				    NULL,
+				    &send_write_packing_test_ops);
+
+	if (!mbtd->debug.send_write_packing_test)
+		goto err_nomem;
+
+	mbtd->debug.err_check_test =
+		debugfs_create_file("err_check_test",
+				    S_IRUGO | S_IWUGO,
+				    tests_root,
+				    NULL,
+				    &err_check_test_ops);
+
+	if (!mbtd->debug.err_check_test)
+		goto err_nomem;
+
+	mbtd->debug.send_invalid_packed_test =
+		debugfs_create_file("send_invalid_packed_test",
+				    S_IRUGO | S_IWUGO,
+				    tests_root,
+				    NULL,
+				    &send_invalid_packed_test_ops);
+
+	if (!mbtd->debug.send_invalid_packed_test)
+		goto err_nomem;
+
+	mbtd->debug.packing_control_test = debugfs_create_file(
+					"packing_control_test",
+					S_IRUGO | S_IWUGO,
+					tests_root,
+					NULL,
+					&write_packing_control_test_ops);
+
+	if (!mbtd->debug.packing_control_test)
+		goto err_nomem;
+
+	return 0;
+
+err_nomem:
+	mmc_block_test_debugfs_cleanup();
+	return -ENOMEM;
+}
+
+static void mmc_block_test_probe(void)
+{
+	struct request_queue *q = test_iosched_get_req_queue();
+	struct mmc_queue *mq;
+	int max_packed_reqs;
+
+	if (!q) {
+		test_pr_err("%s: NULL request queue", __func__);
+		return;
+	}
+
+	mq = q->queuedata;
+	if (!mq) {
+		test_pr_err("%s: NULL mq", __func__);
+		return;
+	}
+
+	max_packed_reqs = mq->card->ext_csd.max_packed_writes;
+	mbtd->exp_packed_stats.packing_events =
+			kzalloc((max_packed_reqs + 1) *
+				sizeof(*mbtd->exp_packed_stats.packing_events),
+				GFP_KERNEL);
+
+	mmc_block_test_debugfs_init();
+}
+
+static void mmc_block_test_remove(void)
+{
+	mmc_block_test_debugfs_cleanup();
+}
+
+static int __init mmc_block_test_init(void)
+{
+	mbtd = kzalloc(sizeof(struct mmc_block_test_data), GFP_KERNEL);
+	if (!mbtd) {
+		test_pr_err("%s: failed to allocate mmc_block_test_data",
+			    __func__);
+		return -ENODEV;
+	}
+
+	mbtd->bdt.init_fn = mmc_block_test_probe;
+	mbtd->bdt.exit_fn = mmc_block_test_remove;
+	INIT_LIST_HEAD(&mbtd->bdt.list);
+	test_iosched_register(&mbtd->bdt);
+
+	return 0;
+}
+
+static void __exit mmc_block_test_exit(void)
+{
+	test_iosched_unregister(&mbtd->bdt);
+	kfree(mbtd);
+}
+
+module_init(mmc_block_test_init);
+module_exit(mmc_block_test_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("MMC block test");
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index df382be..b9c8824 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -3115,7 +3115,8 @@
 	}
 
 #ifdef CONFIG_HIGHMEM
-	__free_pages(test->highmem, BUFFER_ORDER);
+	if (test->highmem)
+		__free_pages(test->highmem, BUFFER_ORDER);
 #endif
 	kfree(test->buffer);
 	kfree(test);
diff --git a/drivers/mmc/card/queue.c b/drivers/mmc/card/queue.c
index 1810f76..b2d11b5 100644
--- a/drivers/mmc/card/queue.c
+++ b/drivers/mmc/card/queue.c
@@ -16,6 +16,8 @@
 #include <linux/kthread.h>
 #include <linux/scatterlist.h>
 #include <linux/dma-mapping.h>
+#include <linux/bitops.h>
+#include <linux/delay.h>
 
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
@@ -27,6 +29,13 @@
 #define MMC_QUEUE_BOUNCESZ	65536
 
 /*
+ * Based on benchmark tests the default num of requests to trigger the write
+ * packing was determined, to keep the read latency as low as possible and
+ * manage to keep the high write throughput.
+ */
+#define DEFAULT_NUM_REQS_TO_START_PACK 17
+
+/*
  * Prepare a MMC request. This just filters out odd stuff.
  */
 static int mmc_prep_request(struct request_queue *q, struct request *req)
@@ -50,10 +59,95 @@
 	return BLKPREP_OK;
 }
 
+static struct request *mmc_peek_request(struct mmc_queue *mq)
+{
+	struct request_queue *q = mq->queue;
+	mq->cmdq_req_peeked = NULL;
+
+	spin_lock_irq(q->queue_lock);
+	if (!blk_queue_stopped(q))
+		mq->cmdq_req_peeked = blk_peek_request(q);
+	spin_unlock_irq(q->queue_lock);
+
+	return mq->cmdq_req_peeked;
+}
+
+static bool mmc_check_blk_queue_start_tag(struct request_queue *q,
+					  struct request *req)
+{
+	int ret;
+
+	spin_lock_irq(q->queue_lock);
+	ret = blk_queue_start_tag(q, req);
+	spin_unlock_irq(q->queue_lock);
+
+	return !!ret;
+}
+
+static inline void mmc_cmdq_ready_wait(struct mmc_host *host,
+					struct mmc_queue *mq)
+{
+	struct mmc_cmdq_context_info *ctx = &host->cmdq_ctx;
+	struct request_queue *q = mq->queue;
+
+	/*
+	 * Wait until all of the following conditions are true:
+	 * 1. There is a request pending in the block layer queue
+	 *    to be processed.
+	 * 2. If the peeked request is flush/discard then there shouldn't
+	 *    be any other direct command active.
+	 * 3. cmdq state should be unhalted.
+	 * 4. cmdq state shouldn't be in error state.
+	 * 5. free tag available to process the new request.
+	 */
+	wait_event(ctx->wait, mmc_peek_request(mq) &&
+		!(((req_op(mq->cmdq_req_peeked) == REQ_OP_FLUSH) ||
+		   (req_op(mq->cmdq_req_peeked) == REQ_OP_DISCARD))
+		  && test_bit(CMDQ_STATE_DCMD_ACTIVE, &ctx->curr_state))
+		&& !(!host->card->part_curr && !mmc_card_suspended(host->card)
+		     && mmc_host_halt(host))
+		&& !(!host->card->part_curr && mmc_host_cq_disable(host) &&
+			!mmc_card_suspended(host->card))
+		&& !test_bit(CMDQ_STATE_ERR, &ctx->curr_state)
+		&& !mmc_check_blk_queue_start_tag(q, mq->cmdq_req_peeked));
+}
+
+static int mmc_cmdq_thread(void *d)
+{
+	struct mmc_queue *mq = d;
+	struct mmc_card *card = mq->card;
+
+	struct mmc_host *host = card->host;
+
+	current->flags |= PF_MEMALLOC;
+	if (card->host->wakeup_on_idle)
+		set_wake_up_idle(true);
+
+	while (1) {
+		int ret = 0;
+
+		mmc_cmdq_ready_wait(host, mq);
+
+		ret = mq->cmdq_issue_fn(mq, mq->cmdq_req_peeked);
+		/*
+		 * Don't requeue if issue_fn fails, just bug on.
+		 * We don't expect failure here and there is no recovery other
+		 * than fixing the actual issue if there is any.
+		 * Also we end the request if there is a partition switch error,
+		 * so we should not requeue the request here.
+		 */
+		if (ret)
+			BUG_ON(1);
+	} /* loop */
+
+	return 0;
+}
+
 static int mmc_queue_thread(void *d)
 {
 	struct mmc_queue *mq = d;
 	struct request_queue *q = mq->queue;
+	struct mmc_card *card = mq->card;
 	struct sched_param scheduler_params = {0};
 
 	scheduler_params.sched_priority = 1;
@@ -61,6 +155,8 @@
 	sched_setscheduler(current, SCHED_FIFO, &scheduler_params);
 
 	current->flags |= PF_MEMALLOC;
+	if (card->host->wakeup_on_idle)
+		set_wake_up_idle(true);
 
 	down(&mq->thread_sem);
 	do {
@@ -78,8 +174,8 @@
 			set_current_state(TASK_RUNNING);
 			mmc_blk_issue_rq(mq, req);
 			cond_resched();
-			if (mq->flags & MMC_QUEUE_NEW_REQUEST) {
-				mq->flags &= ~MMC_QUEUE_NEW_REQUEST;
+			if (test_bit(MMC_QUEUE_NEW_REQUEST, &mq->flags)) {
+				clear_bit(MMC_QUEUE_NEW_REQUEST, &mq->flags);
 				continue; /* fetch again */
 			}
 
@@ -111,6 +207,13 @@
 	return 0;
 }
 
+static void mmc_cmdq_dispatch_req(struct request_queue *q)
+{
+	struct mmc_queue *mq = q->queuedata;
+
+	wake_up(&mq->card->host->cmdq_ctx.wait);
+}
+
 /*
  * Generic MMC request handler.  This is called for any queue on a
  * particular host.  When the host is not busy, we look for a request
@@ -186,6 +289,32 @@
 }
 
 /**
+ * mmc_blk_cmdq_setup_queue
+ * @mq: mmc queue
+ * @card: card to attach to this queue
+ *
+ * Setup queue for CMDQ supporting MMC card
+ */
+void mmc_cmdq_setup_queue(struct mmc_queue *mq, struct mmc_card *card)
+{
+	u64 limit = BLK_BOUNCE_HIGH;
+	struct mmc_host *host = card->host;
+
+	if (mmc_dev(host)->dma_mask && *mmc_dev(host)->dma_mask)
+		limit = *mmc_dev(host)->dma_mask;
+
+	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
+	if (mmc_can_erase(card))
+		mmc_queue_setup_discard(mq->queue, card);
+
+	blk_queue_bounce_limit(mq->queue, limit);
+	blk_queue_max_hw_sectors(mq->queue, min(host->max_blk_count,
+						host->max_req_size / 512));
+	blk_queue_max_segment_size(mq->queue, host->max_seg_size);
+	blk_queue_max_segments(mq->queue, host->max_segs);
+}
+
+/**
  * mmc_init_queue - initialise a queue structure.
  * @mq: mmc queue
  * @card: mmc card to attach this queue
@@ -195,7 +324,7 @@
  * Initialise a MMC card request queue.
  */
 int mmc_init_queue(struct mmc_queue *mq, struct mmc_card *card,
-		   spinlock_t *lock, const char *subname)
+		   spinlock_t *lock, const char *subname, int area_type)
 {
 	struct mmc_host *host = card->host;
 	u64 limit = BLK_BOUNCE_HIGH;
@@ -207,6 +336,37 @@
 		limit = (u64)dma_max_pfn(mmc_dev(host)) << PAGE_SHIFT;
 
 	mq->card = card;
+	if (card->ext_csd.cmdq_support &&
+	    (area_type == MMC_BLK_DATA_AREA_MAIN)) {
+		mq->queue = blk_init_queue(mmc_cmdq_dispatch_req, lock);
+		if (!mq->queue)
+			return -ENOMEM;
+		mmc_cmdq_setup_queue(mq, card);
+		ret = mmc_cmdq_init(mq, card);
+		if (ret) {
+			pr_err("%s: %d: cmdq: unable to set-up\n",
+			       mmc_hostname(card->host), ret);
+			blk_cleanup_queue(mq->queue);
+		} else {
+			sema_init(&mq->thread_sem, 1);
+			/* hook for pm qos cmdq init */
+			if (card->host->cmdq_ops->init)
+				card->host->cmdq_ops->init(card->host);
+			mq->queue->queuedata = mq;
+			mq->thread = kthread_run(mmc_cmdq_thread, mq,
+						 "mmc-cmdqd/%d%s",
+						 host->index,
+						 subname ? subname : "");
+			if (IS_ERR(mq->thread)) {
+				pr_err("%s: %d: cmdq: failed to start mmc-cmdqd thread\n",
+						mmc_hostname(card->host), ret);
+				ret = PTR_ERR(mq->thread);
+			}
+
+			return ret;
+		}
+	}
+
 	mq->queue = blk_init_queue(mmc_request_fn, lock);
 	if (!mq->queue)
 		return -ENOMEM;
@@ -214,6 +374,9 @@
 	mq->mqrq_cur = mqrq_cur;
 	mq->mqrq_prev = mqrq_prev;
 	mq->queue->queuedata = mq;
+	mq->num_wr_reqs_to_start_packing =
+		min_t(int, (int)card->ext_csd.max_packed_writes,
+		     DEFAULT_NUM_REQS_TO_START_PACK);
 
 	blk_queue_prep_rq(mq->queue, mmc_prep_request);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mq->queue);
@@ -279,24 +442,49 @@
 #endif
 
 	if (!mqrq_cur->bounce_buf && !mqrq_prev->bounce_buf) {
+		unsigned int max_segs = host->max_segs;
+
 		blk_queue_bounce_limit(mq->queue, limit);
 		blk_queue_max_hw_sectors(mq->queue,
 			min(host->max_blk_count, host->max_req_size / 512));
-		blk_queue_max_segments(mq->queue, host->max_segs);
 		blk_queue_max_segment_size(mq->queue, host->max_seg_size);
+retry:
+		blk_queue_max_segments(mq->queue, host->max_segs);
 
 		mqrq_cur->sg = mmc_alloc_sg(host->max_segs, &ret);
-		if (ret)
+		if (ret == -ENOMEM)
+			goto cur_sg_alloc_failed;
+		else if (ret)
 			goto cleanup_queue;
 
-
 		mqrq_prev->sg = mmc_alloc_sg(host->max_segs, &ret);
-		if (ret)
+		if (ret == -ENOMEM)
+			goto prev_sg_alloc_failed;
+		else if (ret)
 			goto cleanup_queue;
+
+		goto success;
+
+prev_sg_alloc_failed:
+		kfree(mqrq_cur->sg);
+		mqrq_cur->sg = NULL;
+cur_sg_alloc_failed:
+		host->max_segs /= 2;
+		if (host->max_segs) {
+			goto retry;
+		} else {
+			host->max_segs = max_segs;
+			goto cleanup_queue;
+		}
 	}
 
+success:
 	sema_init(&mq->thread_sem, 1);
 
+	/* hook for pm qos legacy init */
+	if (card->host->ops->init)
+		card->host->ops->init(card->host);
+
 	mq->thread = kthread_run(mmc_queue_thread, mq, "mmcqd/%d%s",
 		host->index, subname ? subname : "");
 
@@ -411,28 +599,179 @@
 	mqrq_prev->packed = NULL;
 }
 
+static void mmc_cmdq_softirq_done(struct request *rq)
+{
+	struct mmc_queue *mq = rq->q->queuedata;
+
+	mq->cmdq_complete_fn(rq);
+}
+
+static void mmc_cmdq_error_work(struct work_struct *work)
+{
+	struct mmc_queue *mq = container_of(work, struct mmc_queue,
+					    cmdq_err_work);
+
+	mq->cmdq_error_fn(mq);
+}
+
+enum blk_eh_timer_return mmc_cmdq_rq_timed_out(struct request *req)
+{
+	struct mmc_queue *mq = req->q->queuedata;
+
+	pr_err("%s: request with tag: %d flags: 0x%llx timed out\n",
+	       mmc_hostname(mq->card->host), req->tag, req->cmd_flags);
+
+	return mq->cmdq_req_timed_out(req);
+}
+
+int mmc_cmdq_init(struct mmc_queue *mq, struct mmc_card *card)
+{
+	int i, ret = 0;
+	/* one slot is reserved for dcmd requests */
+	int q_depth = card->ext_csd.cmdq_depth - 1;
+
+	card->cmdq_init = false;
+	if (!(card->host->caps2 & MMC_CAP2_CMD_QUEUE)) {
+		ret = -ENOTSUPP;
+		goto out;
+	}
+
+	init_waitqueue_head(&card->host->cmdq_ctx.queue_empty_wq);
+	init_waitqueue_head(&card->host->cmdq_ctx.wait);
+
+	mq->mqrq_cmdq = kzalloc(
+			sizeof(struct mmc_queue_req) * q_depth, GFP_KERNEL);
+	if (!mq->mqrq_cmdq) {
+		pr_warn("%s: unable to allocate mqrq's for q_depth %d\n",
+			mmc_card_name(card), q_depth);
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	/* sg is allocated for data request slots only */
+	for (i = 0; i < q_depth; i++) {
+		mq->mqrq_cmdq[i].sg = mmc_alloc_sg(card->host->max_segs, &ret);
+		if (ret) {
+			pr_warn("%s: unable to allocate cmdq sg of size %d\n",
+				mmc_card_name(card),
+				card->host->max_segs);
+			goto free_mqrq_sg;
+		}
+	}
+
+	ret = blk_queue_init_tags(mq->queue, q_depth, NULL, BLK_TAG_ALLOC_FIFO);
+	if (ret) {
+		pr_warn("%s: unable to allocate cmdq tags %d\n",
+				mmc_card_name(card), q_depth);
+		goto free_mqrq_sg;
+	}
+
+	blk_queue_softirq_done(mq->queue, mmc_cmdq_softirq_done);
+	INIT_WORK(&mq->cmdq_err_work, mmc_cmdq_error_work);
+	init_completion(&mq->cmdq_pending_req_done);
+
+	blk_queue_rq_timed_out(mq->queue, mmc_cmdq_rq_timed_out);
+	blk_queue_rq_timeout(mq->queue, 120 * HZ);
+	card->cmdq_init = true;
+
+	goto out;
+
+free_mqrq_sg:
+	for (i = 0; i < q_depth; i++)
+		kfree(mq->mqrq_cmdq[i].sg);
+	kfree(mq->mqrq_cmdq);
+	mq->mqrq_cmdq = NULL;
+out:
+	return ret;
+}
+
+void mmc_cmdq_clean(struct mmc_queue *mq, struct mmc_card *card)
+{
+	int i;
+	int q_depth = card->ext_csd.cmdq_depth - 1;
+
+	blk_free_tags(mq->queue->queue_tags);
+	mq->queue->queue_tags = NULL;
+	blk_queue_free_tags(mq->queue);
+
+	for (i = 0; i < q_depth; i++)
+		kfree(mq->mqrq_cmdq[i].sg);
+	kfree(mq->mqrq_cmdq);
+	mq->mqrq_cmdq = NULL;
+}
+
 /**
  * mmc_queue_suspend - suspend a MMC request queue
  * @mq: MMC queue to suspend
+ * @wait: Wait till MMC request queue is empty
  *
  * Stop the block request queue, and wait for our thread to
  * complete any outstanding requests.  This ensures that we
  * won't suspend while a request is being processed.
  */
-void mmc_queue_suspend(struct mmc_queue *mq)
+int mmc_queue_suspend(struct mmc_queue *mq, int wait)
 {
 	struct request_queue *q = mq->queue;
 	unsigned long flags;
+	int rc = 0;
+	struct mmc_card *card = mq->card;
+	struct request *req;
 
-	if (!(mq->flags & MMC_QUEUE_SUSPENDED)) {
-		mq->flags |= MMC_QUEUE_SUSPENDED;
+	if (card->cmdq_init && blk_queue_tagged(q)) {
+		struct mmc_host *host = card->host;
 
-		spin_lock_irqsave(q->queue_lock, flags);
-		blk_stop_queue(q);
-		spin_unlock_irqrestore(q->queue_lock, flags);
+		if (test_and_set_bit(MMC_QUEUE_SUSPENDED, &mq->flags))
+			goto out;
 
-		down(&mq->thread_sem);
+		if (wait) {
+			blk_cleanup_queue(q);
+			mq->cmdq_shutdown(mq);
+		} else {
+			spin_lock_irqsave(q->queue_lock, flags);
+			blk_stop_queue(q);
+			wake_up(&host->cmdq_ctx.wait);
+			req = blk_peek_request(q);
+			if (req || mq->cmdq_req_peeked ||
+			    host->cmdq_ctx.active_reqs) {
+				clear_bit(MMC_QUEUE_SUSPENDED, &mq->flags);
+				blk_start_queue(q);
+				rc = -EBUSY;
+			}
+			spin_unlock_irqrestore(q->queue_lock, flags);
+		}
+
+		goto out;
 	}
+
+	if (!(test_and_set_bit(MMC_QUEUE_SUSPENDED, &mq->flags))) {
+		if (!wait) {
+			/* suspend/stop the queue in case of suspend */
+			spin_lock_irqsave(q->queue_lock, flags);
+			blk_stop_queue(q);
+			spin_unlock_irqrestore(q->queue_lock, flags);
+		} else {
+			/* shutdown the queue in case of shutdown/reboot */
+			blk_cleanup_queue(q);
+		}
+
+		rc = down_trylock(&mq->thread_sem);
+		if (rc && !wait) {
+			/*
+			 * Failed to take the lock so better to abort the
+			 * suspend because mmcqd thread is processing requests.
+			 */
+			clear_bit(MMC_QUEUE_SUSPENDED, &mq->flags);
+			spin_lock_irqsave(q->queue_lock, flags);
+			blk_start_queue(q);
+			spin_unlock_irqrestore(q->queue_lock, flags);
+			rc = -EBUSY;
+		} else if (rc && wait) {
+			down(&mq->thread_sem);
+			rc = 0;
+		}
+	}
+out:
+	return rc;
 }
 
 /**
@@ -442,12 +781,13 @@
 void mmc_queue_resume(struct mmc_queue *mq)
 {
 	struct request_queue *q = mq->queue;
+	struct mmc_card *card = mq->card;
 	unsigned long flags;
 
-	if (mq->flags & MMC_QUEUE_SUSPENDED) {
-		mq->flags &= ~MMC_QUEUE_SUSPENDED;
+	if (test_and_clear_bit(MMC_QUEUE_SUSPENDED, &mq->flags)) {
 
-		up(&mq->thread_sem);
+		if (!(card->cmdq_init && blk_queue_tagged(q)))
+			up(&mq->thread_sem);
 
 		spin_lock_irqsave(q->queue_lock, flags);
 		blk_start_queue(q);
diff --git a/drivers/mmc/card/queue.h b/drivers/mmc/card/queue.h
index fe58d31..02917aa 100644
--- a/drivers/mmc/card/queue.h
+++ b/drivers/mmc/card/queue.h
@@ -48,20 +48,39 @@
 	struct mmc_async_req	mmc_active;
 	enum mmc_packed_type	cmd_type;
 	struct mmc_packed	*packed;
+	struct mmc_cmdq_req	cmdq_req;
 };
 
 struct mmc_queue {
 	struct mmc_card		*card;
 	struct task_struct	*thread;
 	struct semaphore	thread_sem;
-	unsigned int		flags;
-#define MMC_QUEUE_SUSPENDED	(1 << 0)
-#define MMC_QUEUE_NEW_REQUEST	(1 << 1)
+	unsigned long		flags;
+#define MMC_QUEUE_SUSPENDED		0
+#define MMC_QUEUE_NEW_REQUEST		1
+	int (*issue_fn)(struct mmc_queue *, struct request *);
+	int (*cmdq_issue_fn)(struct mmc_queue *,
+			     struct request *);
+	void (*cmdq_complete_fn)(struct request *);
+	void (*cmdq_error_fn)(struct mmc_queue *);
+	enum blk_eh_timer_return (*cmdq_req_timed_out)(struct request *);
 	void			*data;
 	struct request_queue	*queue;
 	struct mmc_queue_req	mqrq[2];
 	struct mmc_queue_req	*mqrq_cur;
 	struct mmc_queue_req	*mqrq_prev;
+	struct mmc_queue_req	*mqrq_cmdq;
+	bool			wr_packing_enabled;
+	int			num_of_potential_packed_wr_reqs;
+	int			num_wr_reqs_to_start_packing;
+	bool			no_pack_for_random;
+	struct work_struct	cmdq_err_work;
+
+	struct completion	cmdq_pending_req_done;
+	struct request		*cmdq_req_peeked;
+	int (*err_check_fn)(struct mmc_card *, struct mmc_async_req *);
+	void (*packed_test_fn)(struct request_queue *, struct mmc_queue_req *);
+	void (*cmdq_shutdown)(struct mmc_queue *);
 #ifdef CONFIG_MMC_SIMULATE_MAX_SPEED
 	atomic_t max_write_speed;
 	atomic_t max_read_speed;
@@ -73,9 +92,9 @@
 };
 
 extern int mmc_init_queue(struct mmc_queue *, struct mmc_card *, spinlock_t *,
-			  const char *);
+			  const char *, int);
 extern void mmc_cleanup_queue(struct mmc_queue *);
-extern void mmc_queue_suspend(struct mmc_queue *);
+extern int mmc_queue_suspend(struct mmc_queue *, int);
 extern void mmc_queue_resume(struct mmc_queue *);
 
 extern unsigned int mmc_queue_map_sg(struct mmc_queue *,
@@ -88,4 +107,9 @@
 
 extern int mmc_access_rpmb(struct mmc_queue *);
 
+extern void print_mmc_packing_stats(struct mmc_card *card);
+
+extern int mmc_cmdq_init(struct mmc_queue *mq, struct mmc_card *card);
+extern void mmc_cmdq_clean(struct mmc_queue *mq, struct mmc_card *card);
+
 #endif
diff --git a/drivers/mmc/core/Kconfig b/drivers/mmc/core/Kconfig
index 36d9d69..c0592ac 100644
--- a/drivers/mmc/core/Kconfig
+++ b/drivers/mmc/core/Kconfig
@@ -23,6 +23,17 @@
 	  This driver can also be built as a module. If so, the module
 	  will be called pwrseq_simple.
 
+config MMC_RING_BUFFER
+	bool "MMC_RING_BUFFER"
+	depends on MMC
+	default n
+	help
+	  This enables the ring buffer tracing of significant
+	  events for mmc driver to provide command history for
+	  debugging purpose.
+
+	  If unsure, say N.
+
 config MMC_EMBEDDED_SDIO
 	boolean "MMC embedded SDIO device support (EXPERIMENTAL)"
 	help
diff --git a/drivers/mmc/core/Makefile b/drivers/mmc/core/Makefile
index f007151..dbd1f1b 100644
--- a/drivers/mmc/core/Makefile
+++ b/drivers/mmc/core/Makefile
@@ -12,3 +12,4 @@
 obj-$(CONFIG_PWRSEQ_SIMPLE)	+= pwrseq_simple.o
 obj-$(CONFIG_PWRSEQ_EMMC)	+= pwrseq_emmc.o
 mmc_core-$(CONFIG_DEBUG_FS)	+= debugfs.o
+obj-$(CONFIG_MMC_RING_BUFFER)	+= ring_buffer.o
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index c64266f..1c28cf8 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -132,6 +132,16 @@
 	struct mmc_host *host = card->host;
 	int ret;
 
+	if (!drv) {
+		pr_debug("%s: %s: drv is NULL\n", dev_name(dev), __func__);
+		return;
+	}
+
+	if (!card) {
+		pr_debug("%s: %s: card is NULL\n", dev_name(dev), __func__);
+		return;
+	}
+
 	if (dev->driver && drv->shutdown)
 		drv->shutdown(card);
 
@@ -154,6 +164,8 @@
 	if (ret)
 		return ret;
 
+	if (mmc_bus_needs_resume(host))
+		return 0;
 	ret = host->bus_ops->suspend(host);
 	return ret;
 }
@@ -164,11 +176,17 @@
 	struct mmc_host *host = card->host;
 	int ret;
 
+	if (mmc_bus_manual_resume(host)) {
+		host->bus_resume_flags |= MMC_BUSRESUME_NEEDS_RESUME;
+		goto skip_full_resume;
+	}
+
 	ret = host->bus_ops->resume(host);
 	if (ret)
 		pr_warn("%s: error %d during resume (card was removed?)\n",
 			mmc_hostname(host), ret);
 
+skip_full_resume:
 	ret = pm_generic_resume(dev);
 	return ret;
 }
@@ -180,6 +198,9 @@
 	struct mmc_card *card = mmc_dev_to_card(dev);
 	struct mmc_host *host = card->host;
 
+	if (mmc_bus_needs_resume(host))
+		return 0;
+
 	return host->bus_ops->runtime_suspend(host);
 }
 
@@ -188,8 +209,12 @@
 	struct mmc_card *card = mmc_dev_to_card(dev);
 	struct mmc_host *host = card->host;
 
+	if (mmc_bus_needs_resume(host))
+		host->bus_resume_flags &= ~MMC_BUSRESUME_NEEDS_RESUME;
+
 	return host->bus_ops->runtime_resume(host);
 }
+
 #endif /* !CONFIG_PM */
 
 static const struct dev_pm_ops mmc_bus_pm_ops = {
@@ -273,6 +298,9 @@
 	card->dev.release = mmc_release_card;
 	card->dev.type = type;
 
+	spin_lock_init(&card->wr_pack_stats.lock);
+	spin_lock_init(&card->bkops.stats.lock);
+
 	return card;
 }
 
@@ -350,6 +378,13 @@
 
 	card->dev.of_node = mmc_of_find_child_device(card->host, 0);
 
+	if (mmc_card_sdio(card)) {
+		ret = device_init_wakeup(&card->dev, true);
+		if (ret)
+			pr_err("%s: %s: failed to init wakeup: %d\n",
+			       mmc_hostname(card->host), __func__, ret);
+	}
+
 	device_enable_async_suspend(&card->dev);
 
 	ret = device_add(&card->dev);
@@ -383,6 +418,9 @@
 		of_node_put(card->dev.of_node);
 	}
 
+	kfree(card->wr_pack_stats.packing_events);
+	kfree(card->cached_ext_csd);
+
 	put_device(&card->dev);
 }
 
diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index 9551238..c3c888e 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/interrupt.h>
 #include <linux/completion.h>
+#include <linux/devfreq.h>
 #include <linux/device.h>
 #include <linux/delay.h>
 #include <linux/pagemap.h>
@@ -29,6 +30,8 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/of.h>
+#include <linux/pm.h>
+#include <linux/jiffies.h>
 
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
@@ -109,6 +112,7 @@
 
 	data->error = data_errors[prandom_u32() % ARRAY_SIZE(data_errors)];
 	data->bytes_xfered = (prandom_u32() % (data->bytes_xfered >> 9)) << 9;
+	data->fault_injected = true;
 }
 
 #else /* CONFIG_FAIL_MMC_REQUEST */
@@ -120,6 +124,743 @@
 
 #endif /* CONFIG_FAIL_MMC_REQUEST */
 
+static bool mmc_is_data_request(struct mmc_request *mmc_request)
+{
+	switch (mmc_request->cmd->opcode) {
+	case MMC_READ_SINGLE_BLOCK:
+	case MMC_READ_MULTIPLE_BLOCK:
+	case MMC_WRITE_BLOCK:
+	case MMC_WRITE_MULTIPLE_BLOCK:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static void mmc_clk_scaling_start_busy(struct mmc_host *host, bool lock_needed)
+{
+	struct mmc_devfeq_clk_scaling *clk_scaling = &host->clk_scaling;
+
+	if (!clk_scaling->enable)
+		return;
+
+	if (lock_needed)
+		spin_lock_bh(&clk_scaling->lock);
+
+	clk_scaling->start_busy = ktime_get();
+	clk_scaling->is_busy_started = true;
+
+	if (lock_needed)
+		spin_unlock_bh(&clk_scaling->lock);
+}
+
+static void mmc_clk_scaling_stop_busy(struct mmc_host *host, bool lock_needed)
+{
+	struct mmc_devfeq_clk_scaling *clk_scaling = &host->clk_scaling;
+
+	if (!clk_scaling->enable)
+		return;
+
+	if (lock_needed)
+		spin_lock_bh(&clk_scaling->lock);
+
+	if (!clk_scaling->is_busy_started) {
+		WARN_ON(1);
+		goto out;
+	}
+
+	clk_scaling->total_busy_time_us +=
+		ktime_to_us(ktime_sub(ktime_get(),
+			clk_scaling->start_busy));
+	pr_debug("%s: accumulated busy time is %lu usec\n",
+		mmc_hostname(host), clk_scaling->total_busy_time_us);
+	clk_scaling->is_busy_started = false;
+
+out:
+	if (lock_needed)
+		spin_unlock_bh(&clk_scaling->lock);
+}
+
+/**
+ * mmc_cmdq_clk_scaling_start_busy() - start busy timer for data requests
+ * @host: pointer to mmc host structure
+ * @lock_needed: flag indication if locking is needed
+ *
+ * This function starts the busy timer in case it was not already started.
+ */
+void mmc_cmdq_clk_scaling_start_busy(struct mmc_host *host,
+	bool lock_needed)
+{
+	if (!host->clk_scaling.enable)
+		return;
+
+	if (lock_needed)
+		spin_lock_bh(&host->clk_scaling.lock);
+
+	if (!host->clk_scaling.is_busy_started &&
+		!test_bit(CMDQ_STATE_DCMD_ACTIVE,
+			&host->cmdq_ctx.curr_state)) {
+		host->clk_scaling.start_busy = ktime_get();
+		host->clk_scaling.is_busy_started = true;
+	}
+
+	if (lock_needed)
+		spin_unlock_bh(&host->clk_scaling.lock);
+}
+EXPORT_SYMBOL(mmc_cmdq_clk_scaling_start_busy);
+
+/**
+ * mmc_cmdq_clk_scaling_stop_busy() - stop busy timer for last data requests
+ * @host: pointer to mmc host structure
+ * @lock_needed: flag indication if locking is needed
+ *
+ * This function stops the busy timer in case it is the last data request.
+ * In case the current request is not the last one, the busy time till
+ * now will be accumulated and the counter will be restarted.
+ */
+void mmc_cmdq_clk_scaling_stop_busy(struct mmc_host *host,
+	bool lock_needed, bool is_cmdq_dcmd)
+{
+	if (!host->clk_scaling.enable)
+		return;
+
+	if (lock_needed)
+		spin_lock_bh(&host->clk_scaling.lock);
+
+	/*
+	 *  For CQ mode: In completion of DCMD request, start busy time in
+	 *  case of pending data requests
+	 */
+	if (is_cmdq_dcmd) {
+		if (host->cmdq_ctx.data_active_reqs) {
+			host->clk_scaling.is_busy_started = true;
+			host->clk_scaling.start_busy = ktime_get();
+		}
+		goto out;
+	}
+
+	host->clk_scaling.total_busy_time_us +=
+		ktime_to_us(ktime_sub(ktime_get(),
+			host->clk_scaling.start_busy));
+
+	if (host->cmdq_ctx.data_active_reqs) {
+		host->clk_scaling.is_busy_started = true;
+		host->clk_scaling.start_busy = ktime_get();
+	} else {
+		host->clk_scaling.is_busy_started = false;
+	}
+out:
+	if (lock_needed)
+		spin_unlock_bh(&host->clk_scaling.lock);
+
+}
+EXPORT_SYMBOL(mmc_cmdq_clk_scaling_stop_busy);
+
+/**
+ * mmc_can_scale_clk() - Check clock scaling capability
+ * @host: pointer to mmc host structure
+ */
+bool mmc_can_scale_clk(struct mmc_host *host)
+{
+	if (!host) {
+		pr_err("bad host parameter\n");
+		WARN_ON(1);
+		return false;
+	}
+
+	return host->caps2 & MMC_CAP2_CLK_SCALE;
+}
+EXPORT_SYMBOL(mmc_can_scale_clk);
+
+static int mmc_devfreq_get_dev_status(struct device *dev,
+		struct devfreq_dev_status *status)
+{
+	struct mmc_host *host = container_of(dev, struct mmc_host, class_dev);
+	struct mmc_devfeq_clk_scaling *clk_scaling;
+
+	if (!host) {
+		pr_err("bad host parameter\n");
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	clk_scaling = &host->clk_scaling;
+
+	if (!clk_scaling->enable)
+		return 0;
+
+	spin_lock_bh(&clk_scaling->lock);
+
+	/* accumulate the busy time of ongoing work */
+	memset(status, 0, sizeof(*status));
+	if (clk_scaling->is_busy_started) {
+		if (mmc_card_cmdq(host->card)) {
+			/* the "busy-timer" will be restarted in case there
+			 * are pending data requests */
+			mmc_cmdq_clk_scaling_stop_busy(host, false, false);
+		} else {
+			mmc_clk_scaling_stop_busy(host, false);
+			mmc_clk_scaling_start_busy(host, false);
+		}
+	}
+
+	status->busy_time = clk_scaling->total_busy_time_us;
+	status->total_time = ktime_to_us(ktime_sub(ktime_get(),
+		clk_scaling->measure_interval_start));
+	clk_scaling->total_busy_time_us = 0;
+	status->current_frequency = clk_scaling->curr_freq;
+	clk_scaling->measure_interval_start = ktime_get();
+
+	pr_debug("%s: status: load = %lu%% - total_time=%lu busy_time = %lu, clk=%lu\n",
+		mmc_hostname(host),
+		(status->busy_time*100)/status->total_time,
+		status->total_time, status->busy_time,
+		status->current_frequency);
+
+	spin_unlock_bh(&clk_scaling->lock);
+
+	return 0;
+}
+
+static bool mmc_is_valid_state_for_clk_scaling(struct mmc_host *host)
+{
+	struct mmc_card *card = host->card;
+	u32 status;
+
+	/*
+	 * If the current partition type is RPMB, clock switching may not
+	 * work properly as sending tuning command (CMD21) is illegal in
+	 * this mode.
+	 */
+	if (!card || (mmc_card_mmc(card) &&
+			(card->part_curr == EXT_CSD_PART_CONFIG_ACC_RPMB ||
+			mmc_card_doing_bkops(card))))
+		return false;
+
+	if (mmc_send_status(card, &status)) {
+		pr_err("%s: Get card status fail\n", mmc_hostname(card->host));
+		return false;
+	}
+
+	return R1_CURRENT_STATE(status) == R1_STATE_TRAN;
+}
+
+int mmc_cmdq_halt_on_empty_queue(struct mmc_host *host)
+{
+	int err = 0;
+
+	err = wait_event_interruptible(host->cmdq_ctx.queue_empty_wq,
+				(!host->cmdq_ctx.active_reqs));
+	if (host->cmdq_ctx.active_reqs) {
+		pr_err("%s: %s: unexpected active requests (%lu)\n",
+			mmc_hostname(host), __func__,
+			host->cmdq_ctx.active_reqs);
+		return -EPERM;
+	}
+
+	err = mmc_cmdq_halt(host, true);
+	if (err) {
+		pr_err("%s: %s: mmc_cmdq_halt failed (%d)\n",
+		       mmc_hostname(host), __func__, err);
+		goto out;
+	}
+
+out:
+	return err;
+}
+EXPORT_SYMBOL(mmc_cmdq_halt_on_empty_queue);
+
+int mmc_clk_update_freq(struct mmc_host *host,
+		unsigned long freq, enum mmc_load state)
+{
+	int err = 0;
+	bool cmdq_mode;
+
+	if (!host) {
+		pr_err("bad host parameter\n");
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	mmc_host_clk_hold(host);
+	cmdq_mode = mmc_card_cmdq(host->card);
+
+	/* make sure the card supports the frequency we want */
+	if (unlikely(freq > host->card->clk_scaling_highest)) {
+		freq = host->card->clk_scaling_highest;
+		pr_warn("%s: %s: frequency was overridden to %lu\n",
+				mmc_hostname(host), __func__,
+				host->card->clk_scaling_highest);
+	}
+
+	if (unlikely(freq < host->card->clk_scaling_lowest)) {
+		freq = host->card->clk_scaling_lowest;
+		pr_warn("%s: %s: frequency was overridden to %lu\n",
+			mmc_hostname(host), __func__,
+			host->card->clk_scaling_lowest);
+	}
+
+	if (freq == host->clk_scaling.curr_freq)
+		goto out;
+
+	if (host->ops->notify_load) {
+		err = host->ops->notify_load(host, state);
+		if (err) {
+			pr_err("%s: %s: fail on notify_load\n",
+				mmc_hostname(host), __func__);
+			goto out;
+		}
+	}
+
+	if (cmdq_mode) {
+		err = mmc_cmdq_halt_on_empty_queue(host);
+		if (err) {
+			pr_err("%s: %s: failed halting queue (%d)\n",
+				mmc_hostname(host), __func__, err);
+			goto halt_failed;
+		}
+	}
+
+	if (!mmc_is_valid_state_for_clk_scaling(host)) {
+		pr_debug("%s: invalid state for clock scaling - skipping",
+			mmc_hostname(host));
+		goto invalid_state;
+	}
+
+	err = host->bus_ops->change_bus_speed(host, &freq);
+	if (!err)
+		host->clk_scaling.curr_freq = freq;
+	else
+		pr_err("%s: %s: failed (%d) at freq=%lu\n",
+			mmc_hostname(host), __func__, err, freq);
+
+invalid_state:
+	if (cmdq_mode) {
+		if (mmc_cmdq_halt(host, false))
+			pr_err("%s: %s: cmdq unhalt failed\n",
+			mmc_hostname(host), __func__);
+	}
+
+halt_failed:
+	if (err) {
+		/* restore previous state */
+		if (host->ops->notify_load)
+			if (host->ops->notify_load(host,
+				host->clk_scaling.state))
+				pr_err("%s: %s: fail on notify_load restore\n",
+					mmc_hostname(host), __func__);
+	}
+out:
+	mmc_host_clk_release(host);
+	return err;
+}
+EXPORT_SYMBOL(mmc_clk_update_freq);
+
+static int mmc_devfreq_set_target(struct device *dev,
+				unsigned long *freq, u32 devfreq_flags)
+{
+	struct mmc_host *host = container_of(dev, struct mmc_host, class_dev);
+	struct mmc_devfeq_clk_scaling *clk_scaling;
+	int err = 0;
+	int abort;
+
+	if (!(host && freq)) {
+		pr_err("%s: unexpected host/freq parameter\n", __func__);
+		err = -EINVAL;
+		goto out;
+	}
+	clk_scaling = &host->clk_scaling;
+
+	if (!clk_scaling->enable)
+		goto out;
+
+	pr_debug("%s: target freq = %lu (%s)\n", mmc_hostname(host),
+		*freq, current->comm);
+
+	if ((clk_scaling->curr_freq == *freq) ||
+		clk_scaling->skip_clk_scale_freq_update)
+		goto out;
+
+	/* No need to scale the clocks if they are gated */
+	if (!host->ios.clock)
+		goto out;
+
+	spin_lock_bh(&clk_scaling->lock);
+	if (clk_scaling->clk_scaling_in_progress) {
+		pr_debug("%s: clocks scaling is already in-progress by mmc thread\n",
+			mmc_hostname(host));
+		spin_unlock_bh(&clk_scaling->lock);
+		goto out;
+	}
+	clk_scaling->need_freq_change = true;
+	clk_scaling->target_freq = *freq;
+	clk_scaling->state = *freq < clk_scaling->curr_freq ?
+		MMC_LOAD_LOW : MMC_LOAD_HIGH;
+	spin_unlock_bh(&clk_scaling->lock);
+
+	abort = __mmc_claim_host(host, &clk_scaling->devfreq_abort);
+	if (abort)
+		goto out;
+
+	/*
+	 * In case we were able to claim host there is no need to
+	 * defer the frequency change. It will be done now
+	 */
+	clk_scaling->need_freq_change = false;
+
+	mmc_host_clk_hold(host);
+	err = mmc_clk_update_freq(host, *freq, clk_scaling->state);
+	if (err && err != -EAGAIN)
+		pr_err("%s: clock scale to %lu failed with error %d\n",
+			mmc_hostname(host), *freq, err);
+	else
+		pr_debug("%s: clock change to %lu finished successfully (%s)\n",
+			mmc_hostname(host), *freq, current->comm);
+
+
+	mmc_host_clk_release(host);
+	mmc_release_host(host);
+out:
+	return err;
+}
+
+/**
+ * mmc_deferred_scaling() - scale clocks from data path (mmc thread context)
+ * @host: pointer to mmc host structure
+ *
+ * This function does clock scaling in case "need_freq_change" flag was set
+ * by the clock scaling logic.
+ */
+void mmc_deferred_scaling(struct mmc_host *host)
+{
+	unsigned long target_freq;
+	int err;
+
+	if (!host->clk_scaling.enable)
+		return;
+
+	spin_lock_bh(&host->clk_scaling.lock);
+
+	if (host->clk_scaling.clk_scaling_in_progress ||
+		!(host->clk_scaling.need_freq_change)) {
+		spin_unlock_bh(&host->clk_scaling.lock);
+		return;
+	}
+
+
+	atomic_inc(&host->clk_scaling.devfreq_abort);
+	target_freq = host->clk_scaling.target_freq;
+	host->clk_scaling.clk_scaling_in_progress = true;
+	host->clk_scaling.need_freq_change = false;
+	spin_unlock_bh(&host->clk_scaling.lock);
+	pr_debug("%s: doing deferred frequency change (%lu) (%s)\n",
+				mmc_hostname(host),
+				target_freq, current->comm);
+
+	err = mmc_clk_update_freq(host, target_freq,
+		host->clk_scaling.state);
+	if (err && err != -EAGAIN)
+		pr_err("%s: failed on deferred scale clocks (%d)\n",
+			mmc_hostname(host), err);
+	else
+		pr_debug("%s: clocks were successfully scaled to %lu (%s)\n",
+			mmc_hostname(host),
+			target_freq, current->comm);
+	host->clk_scaling.clk_scaling_in_progress = false;
+	atomic_dec(&host->clk_scaling.devfreq_abort);
+}
+EXPORT_SYMBOL(mmc_deferred_scaling);
+
+static int mmc_devfreq_create_freq_table(struct mmc_host *host)
+{
+	int i;
+	struct mmc_devfeq_clk_scaling *clk_scaling = &host->clk_scaling;
+
+	pr_debug("%s: supported: lowest=%lu, highest=%lu\n",
+		mmc_hostname(host),
+		host->card->clk_scaling_lowest,
+		host->card->clk_scaling_highest);
+
+	if (!clk_scaling->freq_table) {
+		pr_debug("%s: no frequency table defined -  setting default\n",
+			mmc_hostname(host));
+		clk_scaling->freq_table = kzalloc(
+			2*sizeof(*(clk_scaling->freq_table)), GFP_KERNEL);
+		if (!clk_scaling->freq_table)
+			return -ENOMEM;
+		clk_scaling->freq_table[0] = host->card->clk_scaling_lowest;
+		clk_scaling->freq_table[1] = host->card->clk_scaling_highest;
+		clk_scaling->freq_table_sz = 2;
+		goto out;
+	}
+
+	if (host->card->clk_scaling_lowest >
+		clk_scaling->freq_table[0])
+		pr_debug("%s: frequency table undershot possible freq\n",
+			mmc_hostname(host));
+
+	for (i = 0; i < clk_scaling->freq_table_sz; i++) {
+		if (clk_scaling->freq_table[i] <=
+			host->card->clk_scaling_highest)
+			continue;
+		clk_scaling->freq_table[i] =
+			host->card->clk_scaling_highest;
+		clk_scaling->freq_table_sz = i + 1;
+		pr_debug("%s: frequency table overshot possible freq (%d)\n",
+				mmc_hostname(host), clk_scaling->freq_table[i]);
+		break;
+	}
+
+out:
+	clk_scaling->devfreq_profile.freq_table = (unsigned long *)clk_scaling->freq_table;
+	clk_scaling->devfreq_profile.max_state = clk_scaling->freq_table_sz;
+
+	for (i = 0; i < clk_scaling->freq_table_sz; i++)
+		pr_debug("%s: freq[%d] = %u\n",
+			mmc_hostname(host), i, clk_scaling->freq_table[i]);
+
+	return 0;
+}
+
+/**
+ * mmc_init_devfreq_clk_scaling() - Initialize clock scaling
+ * @host: pointer to mmc host structure
+ *
+ * Initialize clock scaling for supported hosts. It is assumed that the caller
+ * ensure clock is running at maximum possible frequency before calling this
+ * function. Shall use struct devfreq_simple_ondemand_data to configure
+ * governor.
+ */
+int mmc_init_clk_scaling(struct mmc_host *host)
+{
+	int err;
+
+	if (!host || !host->card) {
+		pr_err("%s: unexpected host/card parameters\n",
+			__func__);
+		return -EINVAL;
+	}
+
+	if (!mmc_can_scale_clk(host) ||
+		!host->bus_ops->change_bus_speed) {
+		pr_debug("%s: clock scaling is not supported\n",
+			mmc_hostname(host));
+		return 0;
+	}
+
+	pr_debug("registering %s dev (%p) to devfreq",
+		mmc_hostname(host),
+		mmc_classdev(host));
+
+	if (host->clk_scaling.devfreq) {
+		pr_err("%s: dev is already registered for dev %p\n",
+			mmc_hostname(host),
+			mmc_dev(host));
+		return -EPERM;
+	}
+	spin_lock_init(&host->clk_scaling.lock);
+	atomic_set(&host->clk_scaling.devfreq_abort, 0);
+	host->clk_scaling.curr_freq = host->ios.clock;
+	host->clk_scaling.clk_scaling_in_progress = false;
+	host->clk_scaling.need_freq_change = false;
+	host->clk_scaling.is_busy_started = false;
+
+	host->clk_scaling.devfreq_profile.polling_ms =
+		host->clk_scaling.polling_delay_ms;
+	host->clk_scaling.devfreq_profile.get_dev_status =
+		mmc_devfreq_get_dev_status;
+	host->clk_scaling.devfreq_profile.target = mmc_devfreq_set_target;
+	host->clk_scaling.devfreq_profile.initial_freq = host->ios.clock;
+
+	host->clk_scaling.ondemand_gov_data.simple_scaling = true;
+	host->clk_scaling.ondemand_gov_data.upthreshold =
+		host->clk_scaling.upthreshold;
+	host->clk_scaling.ondemand_gov_data.downdifferential =
+		host->clk_scaling.upthreshold - host->clk_scaling.downthreshold;
+
+	err = mmc_devfreq_create_freq_table(host);
+	if (err) {
+		pr_err("%s: fail to create devfreq frequency table\n",
+			mmc_hostname(host));
+		return err;
+	}
+
+	pr_debug("%s: adding devfreq with: upthreshold=%u downthreshold=%u polling=%u\n",
+		mmc_hostname(host),
+		host->clk_scaling.ondemand_gov_data.upthreshold,
+		host->clk_scaling.ondemand_gov_data.downdifferential,
+		host->clk_scaling.devfreq_profile.polling_ms);
+	host->clk_scaling.devfreq = devfreq_add_device(
+		mmc_classdev(host),
+		&host->clk_scaling.devfreq_profile,
+		"simple_ondemand",
+		&host->clk_scaling.ondemand_gov_data);
+	if (!host->clk_scaling.devfreq) {
+		pr_err("%s: unable to register with devfreq\n",
+			mmc_hostname(host));
+		return -EPERM;
+	}
+
+	pr_debug("%s: clk scaling is enabled for device %s (%p) with devfreq %p (clock = %uHz)\n",
+		mmc_hostname(host),
+		dev_name(mmc_classdev(host)),
+		mmc_classdev(host),
+		host->clk_scaling.devfreq,
+		host->ios.clock);
+
+	host->clk_scaling.enable = true;
+
+	return err;
+}
+EXPORT_SYMBOL(mmc_init_clk_scaling);
+
+/**
+ * mmc_suspend_clk_scaling() - suspend clock scaling
+ * @host: pointer to mmc host structure
+ *
+ * This API will suspend devfreq feature for the specific host.
+ * The statistics collected by mmc will be cleared.
+ * This function is intended to be called by the pm callbacks
+ * (e.g. runtime_suspend, suspend) of the mmc device
+ */
+int mmc_suspend_clk_scaling(struct mmc_host *host)
+{
+	int err;
+
+	if (!host) {
+		WARN(1, "bad host parameter\n");
+		return -EINVAL;
+	}
+
+	if (!mmc_can_scale_clk(host) || !host->clk_scaling.enable)
+		return 0;
+
+	if (!host->clk_scaling.devfreq) {
+		pr_err("%s: %s: no devfreq is assosiated with this device\n",
+			mmc_hostname(host), __func__);
+		return -EPERM;
+	}
+
+	atomic_inc(&host->clk_scaling.devfreq_abort);
+	wake_up(&host->wq);
+	err = devfreq_suspend_device(host->clk_scaling.devfreq);
+	if (err) {
+		pr_err("%s: %s: failed to suspend devfreq\n",
+			mmc_hostname(host), __func__);
+		return err;
+	}
+	host->clk_scaling.enable = false;
+
+	host->clk_scaling.total_busy_time_us = 0;
+
+	pr_debug("%s: devfreq was removed\n", mmc_hostname(host));
+
+	return 0;
+}
+EXPORT_SYMBOL(mmc_suspend_clk_scaling);
+
+/**
+ * mmc_resume_clk_scaling() - resume clock scaling
+ * @host: pointer to mmc host structure
+ *
+ * This API will resume devfreq feature for the specific host.
+ * This API is intended to be called by the pm callbacks
+ * (e.g. runtime_suspend, suspend) of the mmc device
+ */
+int mmc_resume_clk_scaling(struct mmc_host *host)
+{
+	int err = 0;
+	u32 max_clk_idx = 0;
+	u32 devfreq_max_clk = 0;
+	u32 devfreq_min_clk = 0;
+
+	if (!host) {
+		WARN(1, "bad host parameter\n");
+		return -EINVAL;
+	}
+
+	if (!mmc_can_scale_clk(host))
+		return 0;
+
+	if (!host->clk_scaling.devfreq) {
+		pr_err("%s: %s: no devfreq is assosiated with this device\n",
+			mmc_hostname(host), __func__);
+		return -EPERM;
+	}
+
+	atomic_set(&host->clk_scaling.devfreq_abort, 0);
+
+	max_clk_idx = host->clk_scaling.freq_table_sz - 1;
+	devfreq_max_clk = host->clk_scaling.freq_table[max_clk_idx];
+	devfreq_min_clk = host->clk_scaling.freq_table[0];
+
+	host->clk_scaling.curr_freq = devfreq_max_clk;
+	if (host->ios.clock < host->card->clk_scaling_highest)
+		host->clk_scaling.curr_freq = devfreq_min_clk;
+
+	host->clk_scaling.clk_scaling_in_progress = false;
+	host->clk_scaling.need_freq_change = false;
+
+	err = devfreq_resume_device(host->clk_scaling.devfreq);
+	if (err) {
+		pr_err("%s: %s: failed to resume devfreq (%d)\n",
+			mmc_hostname(host), __func__, err);
+	} else {
+		host->clk_scaling.enable = true;
+		pr_debug("%s: devfreq resumed\n", mmc_hostname(host));
+	}
+
+	return err;
+}
+EXPORT_SYMBOL(mmc_resume_clk_scaling);
+
+/**
+ * mmc_exit_devfreq_clk_scaling() - Disable clock scaling
+ * @host: pointer to mmc host structure
+ *
+ * Disable clock scaling permanently.
+ */
+int mmc_exit_clk_scaling(struct mmc_host *host)
+{
+	int err;
+
+	if (!host) {
+		pr_err("%s: bad host parameter\n", __func__);
+		WARN_ON(1);
+		return -EINVAL;
+	}
+
+	if (!mmc_can_scale_clk(host))
+		return 0;
+
+	if (!host->clk_scaling.devfreq) {
+		pr_err("%s: %s: no devfreq is assosiated with this device\n",
+			mmc_hostname(host), __func__);
+		return -EPERM;
+	}
+
+	err = mmc_suspend_clk_scaling(host);
+	if (err) {
+		pr_err("%s: %s: fail to suspend clock scaling (%d)\n",
+			mmc_hostname(host), __func__,  err);
+		return err;
+	}
+
+	err = devfreq_remove_device(host->clk_scaling.devfreq);
+	if (err) {
+		pr_err("%s: remove devfreq failed (%d)\n",
+			mmc_hostname(host), err);
+		return err;
+	}
+
+	host->clk_scaling.devfreq = NULL;
+	atomic_set(&host->clk_scaling.devfreq_abort, 1);
+	pr_debug("%s: devfreq was removed\n", mmc_hostname(host));
+
+	return 0;
+}
+EXPORT_SYMBOL(mmc_exit_clk_scaling);
+
 static inline void mmc_complete_cmd(struct mmc_request *mrq)
 {
 	if (mrq->cap_cmd_during_tfr && !completion_done(&mrq->cmd_completion))
@@ -150,6 +891,12 @@
 {
 	struct mmc_command *cmd = mrq->cmd;
 	int err = cmd->error;
+#ifdef CONFIG_MMC_PERF_PROFILING
+	ktime_t diff;
+#endif
+
+	if (host->clk_scaling.is_busy_started)
+		mmc_clk_scaling_stop_busy(host, true);
 
 	/* Flag re-tuning needed on CRC errors */
 	if ((cmd->opcode != MMC_SEND_TUNING_BLOCK &&
@@ -198,6 +945,24 @@
 			cmd->resp[2], cmd->resp[3]);
 
 		if (mrq->data) {
+#ifdef CONFIG_MMC_PERF_PROFILING
+			if (host->perf_enable) {
+				diff = ktime_sub(ktime_get(), host->perf.start);
+				if (mrq->data->flags == MMC_DATA_READ) {
+					host->perf.rbytes_drv +=
+							mrq->data->bytes_xfered;
+					host->perf.rtime_drv =
+						ktime_add(host->perf.rtime_drv,
+							diff);
+				} else {
+					host->perf.wbytes_drv +=
+						mrq->data->bytes_xfered;
+					host->perf.wtime_drv =
+						ktime_add(host->perf.wtime_drv,
+							diff);
+				}
+			}
+#endif
 			pr_debug("%s:     %d bytes transferred: %d\n",
 				mmc_hostname(host),
 				mrq->data->bytes_xfered, mrq->data->error);
@@ -341,33 +1106,194 @@
 			mrq->stop->error = 0;
 			mrq->stop->mrq = mrq;
 		}
+#ifdef CONFIG_MMC_PERF_PROFILING
+		if (host->perf_enable)
+			host->perf.start = ktime_get();
+#endif
 	}
 	mmc_host_clk_hold(host);
 	led_trigger_event(host->led, LED_FULL);
+
+	if (mmc_is_data_request(mrq)) {
+		mmc_deferred_scaling(host);
+		mmc_clk_scaling_start_busy(host, true);
+	}
+
 	__mmc_start_request(host, mrq);
 
 	return 0;
 }
 
+static void mmc_start_cmdq_request(struct mmc_host *host,
+				   struct mmc_request *mrq)
+{
+	if (mrq->data) {
+		pr_debug("%s:     blksz %d blocks %d flags %08x tsac %lu ms nsac %d\n",
+			mmc_hostname(host), mrq->data->blksz,
+			mrq->data->blocks, mrq->data->flags,
+			mrq->data->timeout_ns / NSEC_PER_MSEC,
+			mrq->data->timeout_clks);
+
+		BUG_ON(mrq->data->blksz > host->max_blk_size);
+		BUG_ON(mrq->data->blocks > host->max_blk_count);
+		BUG_ON(mrq->data->blocks * mrq->data->blksz >
+			host->max_req_size);
+		mrq->data->error = 0;
+		mrq->data->mrq = mrq;
+	}
+
+	if (mrq->cmd) {
+		mrq->cmd->error = 0;
+		mrq->cmd->mrq = mrq;
+	}
+
+	mmc_host_clk_hold(host);
+	if (likely(host->cmdq_ops->request))
+		host->cmdq_ops->request(host, mrq);
+	else
+		pr_err("%s: %s: issue request failed\n", mmc_hostname(host),
+				__func__);
+}
+
 /**
- *	mmc_start_bkops - start BKOPS for supported cards
+ *	mmc_blk_init_bkops_statistics - initialize bkops statistics
  *	@card: MMC card to start BKOPS
- *	@form_exception: A flag to indicate if this function was
- *			 called due to an exception raised by the card
  *
- *	Start background operations whenever requested.
- *	When the urgent BKOPS bit is set in a R1 command response
- *	then background operations should be started immediately.
+ *	Initialize and enable the bkops statistics
+ */
+void mmc_blk_init_bkops_statistics(struct mmc_card *card)
+{
+	int i;
+	struct mmc_bkops_stats *stats;
+
+	if (!card)
+		return;
+
+	stats = &card->bkops.stats;
+	spin_lock(&stats->lock);
+
+	stats->manual_start = 0;
+	stats->hpi = 0;
+	stats->auto_start = 0;
+	stats->auto_stop = 0;
+	for (i = 0 ; i < MMC_BKOPS_NUM_SEVERITY_LEVELS ; i++)
+		stats->level[i] = 0;
+	stats->enabled = true;
+
+	spin_unlock(&stats->lock);
+}
+EXPORT_SYMBOL(mmc_blk_init_bkops_statistics);
+
+static void mmc_update_bkops_hpi(struct mmc_bkops_stats *stats)
+{
+	spin_lock_irq(&stats->lock);
+	if (stats->enabled)
+		stats->hpi++;
+	spin_unlock_irq(&stats->lock);
+}
+
+static void mmc_update_bkops_start(struct mmc_bkops_stats *stats)
+{
+	spin_lock_irq(&stats->lock);
+	if (stats->enabled)
+		stats->manual_start++;
+	spin_unlock_irq(&stats->lock);
+}
+
+static void mmc_update_bkops_auto_on(struct mmc_bkops_stats *stats)
+{
+	spin_lock_irq(&stats->lock);
+	if (stats->enabled)
+		stats->auto_start++;
+	spin_unlock_irq(&stats->lock);
+}
+
+static void mmc_update_bkops_auto_off(struct mmc_bkops_stats *stats)
+{
+	spin_lock_irq(&stats->lock);
+	if (stats->enabled)
+		stats->auto_stop++;
+	spin_unlock_irq(&stats->lock);
+}
+
+static void mmc_update_bkops_level(struct mmc_bkops_stats *stats,
+					unsigned level)
+{
+	BUG_ON(level >= MMC_BKOPS_NUM_SEVERITY_LEVELS);
+	spin_lock_irq(&stats->lock);
+	if (stats->enabled)
+		stats->level[level]++;
+	spin_unlock_irq(&stats->lock);
+}
+
+/**
+ *	mmc_set_auto_bkops - set auto BKOPS for supported cards
+ *	@card: MMC card to start BKOPS
+ *	@enable: enable/disable flag
+ *	Configure the card to run automatic BKOPS.
+ *
+ *	Should be called when host is claimed.
 */
-void mmc_start_bkops(struct mmc_card *card, bool from_exception)
+int mmc_set_auto_bkops(struct mmc_card *card, bool enable)
+{
+	int ret = 0;
+	u8 bkops_en;
+
+	BUG_ON(!card);
+	enable = !!enable;
+
+	if (unlikely(!mmc_card_support_auto_bkops(card))) {
+		pr_err("%s: %s: card doesn't support auto bkops\n",
+				mmc_hostname(card->host), __func__);
+		return -EPERM;
+	}
+
+	if (enable) {
+		if (mmc_card_doing_auto_bkops(card))
+			goto out;
+		bkops_en = card->ext_csd.bkops_en | EXT_CSD_BKOPS_AUTO_EN;
+	} else {
+		if (!mmc_card_doing_auto_bkops(card))
+			goto out;
+		bkops_en = card->ext_csd.bkops_en & ~EXT_CSD_BKOPS_AUTO_EN;
+	}
+
+	ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BKOPS_EN,
+			bkops_en, 0);
+	if (ret) {
+		pr_err("%s: %s: error in setting auto bkops to %d (%d)\n",
+			mmc_hostname(card->host), __func__, enable, ret);
+	} else {
+		if (enable) {
+			mmc_card_set_auto_bkops(card);
+			mmc_update_bkops_auto_on(&card->bkops.stats);
+		} else {
+			mmc_card_clr_auto_bkops(card);
+			mmc_update_bkops_auto_off(&card->bkops.stats);
+		}
+		card->ext_csd.bkops_en = bkops_en;
+		pr_debug("%s: %s: bkops state %x\n",
+				mmc_hostname(card->host), __func__, bkops_en);
+	}
+out:
+	return ret;
+}
+EXPORT_SYMBOL(mmc_set_auto_bkops);
+
+/**
+ *	mmc_check_bkops - check BKOPS for supported cards
+ *	@card: MMC card to check BKOPS
+ *
+ *	Read the BKOPS status in order to determine whether the
+ *	card requires bkops to be started.
+ */
+void mmc_check_bkops(struct mmc_card *card)
 {
 	int err;
-	int timeout;
-	bool use_busy_signal;
 
 	BUG_ON(!card);
 
-	if (!card->ext_csd.man_bkops_en || mmc_card_doing_bkops(card))
+	if (mmc_card_doing_bkops(card))
 		return;
 
 	err = mmc_read_bkops_status(card);
@@ -377,47 +1303,50 @@
 		return;
 	}
 
-	if (!card->ext_csd.raw_bkops_status)
+	card->bkops.needs_check = false;
+
+	mmc_update_bkops_level(&card->bkops.stats,
+				card->ext_csd.raw_bkops_status);
+
+	card->bkops.needs_bkops = card->ext_csd.raw_bkops_status > 0;
+}
+EXPORT_SYMBOL(mmc_check_bkops);
+
+/**
+ *	mmc_start_manual_bkops - start BKOPS for supported cards
+ *	@card: MMC card to start BKOPS
+ *
+ *      Send START_BKOPS to the card.
+ *      The function should be called with claimed host.
+ */
+void mmc_start_manual_bkops(struct mmc_card *card)
+{
+	int err;
+
+	BUG_ON(!card);
+
+	if (unlikely(!mmc_card_configured_manual_bkops(card)))
 		return;
 
-	if (card->ext_csd.raw_bkops_status < EXT_CSD_BKOPS_LEVEL_2 &&
-	    from_exception)
+	if (mmc_card_doing_bkops(card))
 		return;
 
-	mmc_claim_host(card->host);
-	if (card->ext_csd.raw_bkops_status >= EXT_CSD_BKOPS_LEVEL_2) {
-		timeout = MMC_BKOPS_MAX_TIMEOUT;
-		use_busy_signal = true;
-	} else {
-		timeout = 0;
-		use_busy_signal = false;
-	}
-
 	mmc_retune_hold(card->host);
 
-	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-			EXT_CSD_BKOPS_START, 1, timeout,
-			use_busy_signal, true, false);
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_BKOPS_START,
+				1, 0, false, true, false);
 	if (err) {
-		pr_warn("%s: Error %d starting bkops\n",
-			mmc_hostname(card->host), err);
-		mmc_retune_release(card->host);
-		goto out;
+		pr_err("%s: Error %d starting manual bkops\n",
+				mmc_hostname(card->host), err);
+	} else {
+		mmc_card_set_doing_bkops(card);
+		mmc_update_bkops_start(&card->bkops.stats);
+		card->bkops.needs_bkops = false;
 	}
 
-	/*
-	 * For urgent bkops status (LEVEL_2 and more)
-	 * bkops executed synchronously, otherwise
-	 * the operation is in progress
-	 */
-	if (!use_busy_signal)
-		mmc_card_set_doing_bkops(card);
-	else
-		mmc_retune_release(card->host);
-out:
-	mmc_release_host(card->host);
+	mmc_retune_release(card->host);
 }
-EXPORT_SYMBOL(mmc_start_bkops);
+EXPORT_SYMBOL(mmc_start_manual_bkops);
 
 /*
  * mmc_wait_data_done() - done callback for data request
@@ -427,10 +1356,13 @@
  */
 static void mmc_wait_data_done(struct mmc_request *mrq)
 {
+	unsigned long flags;
 	struct mmc_context_info *context_info = &mrq->host->context_info;
 
+	spin_lock_irqsave(&context_info->lock, flags);
 	context_info->is_done_rcv = true;
 	wake_up_interruptible(&context_info->wait);
+	spin_unlock_irqrestore(&context_info->lock, flags);
 }
 
 static void mmc_wait_done(struct mmc_request *mrq)
@@ -520,6 +1452,7 @@
 	struct mmc_command *cmd;
 	struct mmc_context_info *context_info = &host->context_info;
 	int err;
+	bool is_done_rcv = false;
 	unsigned long flags;
 
 	while (1) {
@@ -527,9 +1460,10 @@
 				(context_info->is_done_rcv ||
 				 context_info->is_new_req));
 		spin_lock_irqsave(&context_info->lock, flags);
+		is_done_rcv = context_info->is_done_rcv;
 		context_info->is_waiting_last_req = false;
 		spin_unlock_irqrestore(&context_info->lock, flags);
-		if (context_info->is_done_rcv) {
+		if (is_done_rcv) {
 			context_info->is_done_rcv = false;
 			context_info->is_new_req = false;
 			cmd = mrq->cmd;
@@ -564,20 +1498,20 @@
 	struct mmc_command *cmd;
 
 	while (1) {
-		wait_for_completion(&mrq->completion);
+		wait_for_completion_io(&mrq->completion);
 
 		cmd = mrq->cmd;
 
 		/*
-		 * If host has timed out waiting for the sanitize
+		 * If host has timed out waiting for the sanitize/bkops
 		 * to complete, card might be still in programming state
 		 * so let's try to bring the card out of programming
 		 * state.
 		 */
-		if (cmd->sanitize_busy && cmd->error == -ETIMEDOUT) {
+		if ((cmd->bkops_busy || cmd->sanitize_busy) && cmd->error == -ETIMEDOUT) {
 			if (!mmc_interrupt_hpi(host->card)) {
-				pr_warn("%s: %s: Interrupted sanitize\n",
-					mmc_hostname(host), __func__);
+				pr_warn("%s: %s: Interrupted sanitize/bkops\n",
+					   mmc_hostname(host), __func__);
 				cmd->error = 0;
 				break;
 			} else {
@@ -664,6 +1598,134 @@
 }
 
 /**
+ *	mmc_cmdq_discard_card_queue - discard the task[s] in the device
+ *	@host: host instance
+ *	@tasks: mask of tasks to be knocked off
+ *		0: remove all queued tasks
+ */
+int mmc_cmdq_discard_queue(struct mmc_host *host, u32 tasks)
+{
+	return mmc_discard_queue(host, tasks);
+}
+EXPORT_SYMBOL(mmc_cmdq_discard_queue);
+
+
+/**
+ *	mmc_cmdq_post_req - post process of a completed request
+ *	@host: host instance
+ *	@tag: the request tag.
+ *	@err: non-zero is error, success otherwise
+ */
+void mmc_cmdq_post_req(struct mmc_host *host, int tag, int err)
+{
+	if (likely(host->cmdq_ops->post_req))
+		host->cmdq_ops->post_req(host, tag, err);
+}
+EXPORT_SYMBOL(mmc_cmdq_post_req);
+
+/**
+ *	mmc_cmdq_halt - halt/un-halt the command queue engine
+ *	@host: host instance
+ *	@halt: true - halt, un-halt otherwise
+ *
+ *	Host halts the command queue engine. It should complete
+ *	the ongoing transfer and release the bus.
+ *	All legacy commands can be sent upon successful
+ *	completion of this function.
+ *	Returns 0 on success, negative otherwise
+ */
+int mmc_cmdq_halt(struct mmc_host *host, bool halt)
+{
+	int err = 0;
+
+	if (mmc_host_cq_disable(host)) {
+		pr_debug("%s: %s: CQE is already disabled\n",
+				mmc_hostname(host), __func__);
+		return 0;
+	}
+
+	if ((halt && mmc_host_halt(host)) ||
+			(!halt && !mmc_host_halt(host))) {
+		pr_debug("%s: %s: CQE is already %s\n", mmc_hostname(host),
+				__func__, halt ? "halted" : "un-halted");
+		return 0;
+	}
+
+	mmc_host_clk_hold(host);
+	if (host->cmdq_ops->halt) {
+		err = host->cmdq_ops->halt(host, halt);
+		if (!err && host->ops->notify_halt)
+			host->ops->notify_halt(host, halt);
+		if (!err && halt)
+			mmc_host_set_halt(host);
+		else if (!err && !halt) {
+			mmc_host_clr_halt(host);
+			wake_up(&host->cmdq_ctx.wait);
+		}
+	} else {
+		err = -ENOSYS;
+	}
+	mmc_host_clk_release(host);
+	return err;
+}
+EXPORT_SYMBOL(mmc_cmdq_halt);
+
+int mmc_cmdq_start_req(struct mmc_host *host, struct mmc_cmdq_req *cmdq_req)
+{
+	struct mmc_request *mrq = &cmdq_req->mrq;
+
+	mrq->host = host;
+	if (mmc_card_removed(host->card)) {
+		mrq->cmd->error = -ENOMEDIUM;
+		return -ENOMEDIUM;
+	}
+	mmc_start_cmdq_request(host, mrq);
+	return 0;
+}
+EXPORT_SYMBOL(mmc_cmdq_start_req);
+
+static void mmc_cmdq_dcmd_req_done(struct mmc_request *mrq)
+{
+	mmc_host_clk_release(mrq->host);
+	complete(&mrq->completion);
+}
+
+int mmc_cmdq_wait_for_dcmd(struct mmc_host *host,
+			struct mmc_cmdq_req *cmdq_req)
+{
+	struct mmc_request *mrq = &cmdq_req->mrq;
+	struct mmc_command *cmd = mrq->cmd;
+	int err = 0;
+
+	init_completion(&mrq->completion);
+	mrq->done = mmc_cmdq_dcmd_req_done;
+	err = mmc_cmdq_start_req(host, cmdq_req);
+	if (err)
+		return err;
+
+	wait_for_completion_io(&mrq->completion);
+	if (cmd->error) {
+		pr_err("%s: DCMD %d failed with err %d\n",
+				mmc_hostname(host), cmd->opcode,
+				cmd->error);
+		err = cmd->error;
+		mmc_host_clk_hold(host);
+		host->cmdq_ops->dumpstate(host);
+		mmc_host_clk_release(host);
+	}
+	return err;
+}
+EXPORT_SYMBOL(mmc_cmdq_wait_for_dcmd);
+
+int mmc_cmdq_prepare_flush(struct mmc_command *cmd)
+{
+	return   __mmc_switch_cmdq_mode(cmd, EXT_CSD_CMD_SET_NORMAL,
+				     EXT_CSD_FLUSH_CACHE, 1,
+				     0, true, true);
+}
+EXPORT_SYMBOL(mmc_cmdq_prepare_flush);
+
+/**
  *	mmc_start_req - start a non-blocking request
  *	@host: MMC host to start command
  *	@areq: async request to start
@@ -713,7 +1775,7 @@
 			if (areq)
 				mmc_post_req(host, areq->mrq, -EINVAL);
 
-			mmc_start_bkops(host->card, true);
+			mmc_check_bkops(host->card);
 
 			/* prepare the request again */
 			if (areq)
@@ -735,8 +1797,7 @@
 	if (host->areq)
 		mmc_post_req(host, host->areq->mrq, 0);
 
-	 /* Cancel a prepared request if it was not started. */
-	if ((err || start_err) && areq)
+	if (err && areq)
 		mmc_post_req(host, areq->mrq, -EINVAL);
 
 	if (err)
@@ -764,6 +1825,10 @@
  */
 void mmc_wait_for_req(struct mmc_host *host, struct mmc_request *mrq)
 {
+#ifdef CONFIG_MMC_BLOCK_DEFERRED_RESUME
+	if (mmc_bus_needs_resume(host))
+		mmc_resume_bus(host);
+#endif
 	__mmc_start_req(host, mrq);
 
 	if (!mrq->cap_cmd_during_tfr)
@@ -819,8 +1884,6 @@
 	}
 
 	err = mmc_send_hpi_cmd(card, &status);
-	if (err)
-		goto out;
 
 	prg_wait = jiffies + msecs_to_jiffies(card->ext_csd.out_of_int_time);
 	do {
@@ -828,8 +1891,13 @@
 
 		if (!err && R1_CURRENT_STATE(status) == R1_STATE_TRAN)
 			break;
-		if (time_after(jiffies, prg_wait))
-			err = -ETIMEDOUT;
+		if (time_after(jiffies, prg_wait)) {
+			err = mmc_send_status(card, &status);
+			if (!err && R1_CURRENT_STATE(status) != R1_STATE_TRAN)
+				err = -ETIMEDOUT;
+			else
+				break;
+		}
 	} while (!err);
 
 out:
@@ -881,6 +1949,11 @@
 	int err = 0;
 
 	BUG_ON(!card);
+	if (unlikely(!mmc_card_configured_manual_bkops(card)))
+		goto out;
+	if (!mmc_card_doing_bkops(card))
+		goto out;
+
 	err = mmc_interrupt_hpi(card);
 
 	/*
@@ -889,10 +1962,11 @@
 	 */
 	if (!err || (err == -EINVAL)) {
 		mmc_card_clr_doing_bkops(card);
+		mmc_update_bkops_hpi(&card->bkops.stats);
 		mmc_retune_release(card->host);
 		err = 0;
 	}
-
+out:
 	return err;
 }
 EXPORT_SYMBOL(mmc_stop_bkops);
@@ -908,8 +1982,14 @@
 	if (err)
 		return err;
 
-	card->ext_csd.raw_bkops_status = ext_csd[EXT_CSD_BKOPS_STATUS];
-	card->ext_csd.raw_exception_status = ext_csd[EXT_CSD_EXP_EVENTS_STATUS];
+	card->ext_csd.raw_bkops_status = ext_csd[EXT_CSD_BKOPS_STATUS] &
+		MMC_BKOPS_URGENCY_MASK;
+	card->ext_csd.raw_exception_status =
+		ext_csd[EXT_CSD_EXP_EVENTS_STATUS] & (EXT_CSD_URGENT_BKOPS |
+						      EXT_CSD_DYNCAP_NEEDED |
+						      EXT_CSD_SYSPOOL_EXHAUSTED
+						      | EXT_CSD_PACKED_FAILURE);
+
 	kfree(ext_csd);
 	return 0;
 }
@@ -927,6 +2007,10 @@
 {
 	unsigned int mult;
 
+	if (!card) {
+		WARN_ON(1);
+		return;
+	}
 	/*
 	 * SDIO cards only define an upper 1 s limit on access.
 	 */
@@ -993,9 +2077,11 @@
 	 * Address this by setting the read timeout to a "reasonably high"
 	 * value. For the cards tested, 600ms has proven enough. If necessary,
 	 * this value can be increased if other problematic cards require this.
+	 * Certain Hynix 5.x cards giving read timeout even with 300ms.
+	 * Increasing further to max value (4s).
 	 */
 	if (mmc_card_long_read_time(card) && data->flags & MMC_DATA_READ) {
-		data->timeout_ns = 600000000;
+		data->timeout_ns = 4000000000u;
 		data->timeout_clks = 0;
 	}
 
@@ -1014,6 +2100,11 @@
 				data->timeout_ns =  100000000;	/* 100ms */
 		}
 	}
+	/* Increase the timeout values for some bad INAND MCP devices */
+	if (card->quirks & MMC_QUIRK_INAND_DATA_TIMEOUT) {
+		data->timeout_ns = 4000000000u; /* 4s */
+		data->timeout_clks = 0;
+	}
 }
 EXPORT_SYMBOL(mmc_set_data_timeout);
 
@@ -1064,6 +2155,7 @@
 	might_sleep();
 
 	add_wait_queue(&host->wq, &wait);
+
 	spin_lock_irqsave(&host->lock, flags);
 	while (1) {
 		set_current_state(TASK_UNINTERRUPTIBLE);
@@ -1135,9 +2227,14 @@
 {
 	pm_runtime_get_sync(&card->dev);
 	mmc_claim_host(card->host);
+#ifdef CONFIG_MMC_BLOCK_DEFERRED_RESUME
+	if (mmc_bus_needs_resume(card->host))
+		mmc_resume_bus(card->host);
+#endif
 }
 EXPORT_SYMBOL(mmc_get_card);
 
+
 /*
  * This is a helper function, which releases the host and drops the runtime
  * pm reference for the card device.
@@ -1154,7 +2251,7 @@
  * Internal function that does the actual ios call to the host driver,
  * optionally printing some debug output.
  */
-static inline void mmc_set_ios(struct mmc_host *host)
+void mmc_set_ios(struct mmc_host *host)
 {
 	struct mmc_ios *ios = &host->ios;
 
@@ -1167,7 +2264,21 @@
 	if (ios->clock > 0)
 		mmc_set_ungated(host);
 	host->ops->set_ios(host, ios);
+	if (ios->old_rate != ios->clock) {
+		if (likely(ios->clk_ts)) {
+			char trace_info[80];
+			snprintf(trace_info, 80,
+				"%s: freq_KHz %d --> %d | t = %d",
+				mmc_hostname(host), ios->old_rate / 1000,
+				ios->clock / 1000, jiffies_to_msecs(
+					(long)jiffies - (long)ios->clk_ts));
+			trace_mmc_clk(trace_info);
+		}
+		ios->old_rate = ios->clock;
+		ios->clk_ts = jiffies;
+	}
 }
+EXPORT_SYMBOL(mmc_set_ios);
 
 /*
  * Control chip select pin on a host.
@@ -1210,6 +2321,8 @@
 {
 	unsigned long flags;
 
+	WARN_ON(!host->ios.clock);
+
 	spin_lock_irqsave(&host->clk_lock, flags);
 	host->clk_old = host->ios.clock;
 	host->ios.clock = 0;
@@ -1232,7 +2345,7 @@
 	 * we just ignore the call.
 	 */
 	if (host->clk_old) {
-		BUG_ON(host->ios.clock);
+		WARN_ON(host->ios.clock);
 		/* This call will also set host->clk_gated to false */
 		__mmc_set_clock(host, host->clk_old);
 	}
@@ -1255,6 +2368,10 @@
 void mmc_set_ungated(struct mmc_host *host)
 {
 }
+
+void mmc_gate_clock(struct mmc_host *host)
+{
+}
 #endif
 
 int mmc_execute_tuning(struct mmc_card *card)
@@ -1315,9 +2432,10 @@
 
 	if (mmc_host_is_spi(host))
 		host->ios.chip_select = MMC_CS_HIGH;
-	else
+	else {
 		host->ios.chip_select = MMC_CS_DONTCARE;
-	host->ios.bus_mode = MMC_BUSMODE_PUSHPULL;
+		host->ios.bus_mode = MMC_BUSMODE_OPENDRAIN;
+	}
 	host->ios.bus_width = MMC_BUS_WIDTH_1;
 	host->ios.timing = MMC_TIMING_LEGACY;
 	host->ios.drv_type = 0;
@@ -1796,12 +2914,15 @@
 		pr_warn("%s: cannot verify signal voltage switch\n",
 			mmc_hostname(host));
 
-	mmc_host_clk_hold(host);
-
 	cmd.opcode = SD_SWITCH_VOLTAGE;
 	cmd.arg = 0;
 	cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
 
+	/*
+	 * Hold the clock reference so clock doesn't get auto gated during this
+	 * voltage switch sequence.
+	 */
+	mmc_host_clk_hold(host);
 	err = mmc_wait_for_cmd(host, &cmd, 0);
 	if (err)
 		goto err_command;
@@ -2057,6 +3178,40 @@
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
+int mmc_resume_bus(struct mmc_host *host)
+{
+	unsigned long flags;
+	int err = 0;
+
+	if (!mmc_bus_needs_resume(host))
+		return -EINVAL;
+
+	pr_debug("%s: Starting deferred resume\n", mmc_hostname(host));
+	spin_lock_irqsave(&host->lock, flags);
+	host->bus_resume_flags &= ~MMC_BUSRESUME_NEEDS_RESUME;
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	mmc_bus_get(host);
+	if (host->bus_ops && !host->bus_dead && host->card) {
+		mmc_power_up(host, host->card->ocr);
+		BUG_ON(!host->bus_ops->resume);
+		host->bus_ops->resume(host);
+		if (mmc_card_cmdq(host->card)) {
+			err = mmc_cmdq_halt(host, false);
+			if (err)
+				pr_err("%s: %s: unhalt failed: %d\n",
+				       mmc_hostname(host), __func__, err);
+			else
+				mmc_card_clr_suspended(host->card);
+		}
+	}
+
+	mmc_bus_put(host);
+	pr_debug("%s: Deferred resume completed\n", mmc_hostname(host));
+	return 0;
+}
+EXPORT_SYMBOL(mmc_resume_bus);
+
 /*
  * Assign a mmc bus handler to a host. Only one bus handler may control a
  * host at any given time.
@@ -2288,16 +3443,9 @@
 		return mmc_mmc_erase_timeout(card, arg, qty);
 }
 
-static int mmc_do_erase(struct mmc_card *card, unsigned int from,
-			unsigned int to, unsigned int arg)
+static u32 mmc_get_erase_qty(struct mmc_card *card, u32 from, u32 to)
 {
-	struct mmc_command cmd = {0};
-	unsigned int qty = 0, busy_timeout = 0;
-	bool use_r1b_resp = false;
-	unsigned long timeout;
-	int err;
-
-	mmc_retune_hold(card->host);
+	u32 qty = 0;
 
 	/*
 	 * qty is used to calculate the erase timeout which depends on how many
@@ -2323,12 +3471,120 @@
 	else
 		qty += ((to / card->erase_size) -
 			(from / card->erase_size)) + 1;
+	return qty;
+}
+
+static int mmc_cmdq_send_erase_cmd(struct mmc_cmdq_req *cmdq_req,
+		struct mmc_card *card, u32 opcode, u32 arg, u32 qty)
+{
+	struct mmc_command *cmd = cmdq_req->mrq.cmd;
+	int err;
+
+	memset(cmd, 0, sizeof(struct mmc_command));
+
+	cmd->opcode = opcode;
+	cmd->arg = arg;
+	if (cmd->opcode == MMC_ERASE) {
+		cmd->flags = MMC_RSP_SPI_R1B | MMC_RSP_R1B | MMC_CMD_AC;
+		cmd->busy_timeout = mmc_erase_timeout(card, arg, qty);
+	} else {
+		cmd->flags = MMC_RSP_SPI_R1 | MMC_RSP_R1 | MMC_CMD_AC;
+	}
+
+	err = mmc_cmdq_wait_for_dcmd(card->host, cmdq_req);
+	if (err) {
+		pr_err("mmc_erase: group start error %d, status %#x\n",
+				err, cmd->resp[0]);
+		return -EIO;
+	}
+	return 0;
+}
+
+static int mmc_cmdq_do_erase(struct mmc_cmdq_req *cmdq_req,
+			struct mmc_card *card, unsigned int from,
+			unsigned int to, unsigned int arg)
+{
+	struct mmc_command *cmd = cmdq_req->mrq.cmd;
+	unsigned int qty = 0;
+	unsigned long timeout;
+	unsigned int fr, nr;
+	int err;
+
+	fr = from;
+	nr = to - from + 1;
+
+	qty = mmc_get_erase_qty(card, from, to);
 
 	if (!mmc_card_blockaddr(card)) {
 		from <<= 9;
 		to <<= 9;
 	}
 
+	err = mmc_cmdq_send_erase_cmd(cmdq_req, card, MMC_ERASE_GROUP_START,
+			from, qty);
+	if (err)
+		goto out;
+
+	err = mmc_cmdq_send_erase_cmd(cmdq_req, card, MMC_ERASE_GROUP_END,
+			to, qty);
+	if (err)
+		goto out;
+
+	err = mmc_cmdq_send_erase_cmd(cmdq_req, card, MMC_ERASE,
+			arg, qty);
+	if (err)
+		goto out;
+
+	timeout = jiffies + msecs_to_jiffies(MMC_CORE_TIMEOUT_MS);
+	do {
+		memset(cmd, 0, sizeof(struct mmc_command));
+		cmd->opcode = MMC_SEND_STATUS;
+		cmd->arg = card->rca << 16;
+		cmd->flags = MMC_RSP_R1 | MMC_CMD_AC;
+		/* Do not retry else we can't see errors */
+		err = mmc_cmdq_wait_for_dcmd(card->host, cmdq_req);
+		if (err || (cmd->resp[0] & 0xFDF92000)) {
+			pr_err("error %d requesting status %#x\n",
+				err, cmd->resp[0]);
+			err = -EIO;
+			goto out;
+		}
+		/* Timeout if the device never becomes ready for data and
+		 * never leaves the program state.
+		 */
+		if (time_after(jiffies, timeout)) {
+			pr_err("%s: Card stuck in programming state! %s\n",
+				mmc_hostname(card->host), __func__);
+			err =  -EIO;
+			goto out;
+		}
+	} while (!(cmd->resp[0] & R1_READY_FOR_DATA) ||
+		 (R1_CURRENT_STATE(cmd->resp[0]) == R1_STATE_PRG));
+out:
+	return err;
+}
+
+static int mmc_do_erase(struct mmc_card *card, unsigned int from,
+			unsigned int to, unsigned int arg)
+{
+	struct mmc_command cmd = {0};
+	unsigned int qty = 0, busy_timeout = 0;
+	bool use_r1b_resp = false;
+	unsigned long timeout;
+	unsigned int fr, nr;
+	int err;
+
+	fr = from;
+	nr = to - from + 1;
+
+	qty = mmc_get_erase_qty(card, from, to);
+
+	if (!mmc_card_blockaddr(card)) {
+		from <<= 9;
+		to <<= 9;
+	}
+
+	mmc_retune_hold(card->host);
 	if (mmc_card_sd(card))
 		cmd.opcode = SD_ERASE_WR_BLK_START;
 	else
@@ -2475,20 +3731,9 @@
 	return nr_new;
 }
 
-/**
- * mmc_erase - erase sectors.
- * @card: card to erase
- * @from: first sector to erase
- * @nr: number of sectors to erase
- * @arg: erase command argument (SD supports only %MMC_ERASE_ARG)
- *
- * Caller must claim host before calling this function.
- */
-int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
-	      unsigned int arg)
+int mmc_erase_sanity_check(struct mmc_card *card, unsigned int from,
+		unsigned int nr, unsigned int arg)
 {
-	unsigned int rem, to = from + nr;
-	int err;
 
 	if (!(card->host->caps & MMC_CAP_ERASE) ||
 	    !(card->csd.cmdclass & CCC_ERASE))
@@ -2512,6 +3757,68 @@
 		if (from % card->erase_size || nr % card->erase_size)
 			return -EINVAL;
 	}
+	return 0;
+}
+
+int mmc_cmdq_erase(struct mmc_cmdq_req *cmdq_req,
+	      struct mmc_card *card, unsigned int from, unsigned int nr,
+	      unsigned int arg)
+{
+	unsigned int rem, to = from + nr;
+	int ret;
+
+	ret = mmc_erase_sanity_check(card, from, nr, arg);
+	if (ret)
+		return ret;
+
+	if (arg == MMC_ERASE_ARG) {
+		rem = from % card->erase_size;
+		if (rem) {
+			rem = card->erase_size - rem;
+			from += rem;
+			if (nr > rem)
+				nr -= rem;
+			else
+				return 0;
+		}
+		rem = nr % card->erase_size;
+		if (rem)
+			nr -= rem;
+	}
+
+	if (nr == 0)
+		return 0;
+
+	to = from + nr;
+
+	if (to <= from)
+		return -EINVAL;
+
+	/* 'from' and 'to' are inclusive */
+	to -= 1;
+
+	return mmc_cmdq_do_erase(cmdq_req, card, from, to, arg);
+}
+EXPORT_SYMBOL(mmc_cmdq_erase);
+
+/**
+ * mmc_erase - erase sectors.
+ * @card: card to erase
+ * @from: first sector to erase
+ * @nr: number of sectors to erase
+ * @arg: erase command argument (SD supports only %MMC_ERASE_ARG)
+ *
+ * Caller must claim host before calling this function.
+ */
+int mmc_erase(struct mmc_card *card, unsigned int from, unsigned int nr,
+	      unsigned int arg)
+{
+	unsigned int rem, to = from + nr;
+	int ret;
+
+	ret = mmc_erase_sanity_check(card, from, nr, arg);
+	if (ret)
+		return ret;
 
 	if (arg == MMC_ERASE_ARG)
 		nr = mmc_align_erase_size(card, &from, &to, nr);
@@ -2535,10 +3842,10 @@
 	 */
 	rem = card->erase_size - (from % card->erase_size);
 	if ((arg & MMC_TRIM_ARGS) && (card->eg_boundary) && (nr > rem)) {
-		err = mmc_do_erase(card, from, from + rem - 1, arg);
+		ret = mmc_do_erase(card, from, from + rem - 1, arg);
 		from += rem;
-		if ((err) || (to <= from))
-			return err;
+		if ((ret) || (to <= from))
+			return ret;
 	}
 
 	return mmc_do_erase(card, from, to, arg);
@@ -2688,6 +3995,9 @@
 	struct mmc_host *host = card->host;
 	unsigned int max_discard, max_trim;
 
+	if (!host->max_busy_timeout ||
+			(host->caps2 & MMC_CAP2_MAX_DISCARD_SIZE))
+
 	/*
 	 * Without erase_group_def set, MMC erase timeout depends on clock
 	 * frequence which can change.  In that case, the best choice is
@@ -2749,6 +4059,23 @@
 	mmc_host_clk_release(host);
 }
 
+/*
+ * mmc_cmdq_hw_reset: Helper API for doing
+ * reset_all of host and reinitializing card.
+ * This must be called with mmc_claim_host
+ * acquired by the caller.
+ */
+int mmc_cmdq_hw_reset(struct mmc_host *host)
+{
+	if (!host->bus_ops->power_restore)
+	return -EOPNOTSUPP;
+
+	mmc_power_cycle(host, host->ocr_avail);
+	mmc_select_voltage(host, host->card->ocr);
+	return host->bus_ops->power_restore(host);
+}
+EXPORT_SYMBOL(mmc_cmdq_hw_reset);
+
 int mmc_hw_reset(struct mmc_host *host)
 {
 	int ret;
@@ -2889,12 +4216,16 @@
 
 void mmc_rescan(struct work_struct *work)
 {
+	unsigned long flags;
 	struct mmc_host *host =
 		container_of(work, struct mmc_host, detect.work);
-	int i;
 
-	if (host->rescan_disable)
+	spin_lock_irqsave(&host->lock, flags);
+	if (host->rescan_disable) {
+		spin_unlock_irqrestore(&host->lock, flags);
 		return;
+	}
+	spin_unlock_irqrestore(&host->lock, flags);
 
 	/* If there is a non-removable card registered, only scan once */
 	if (!mmc_card_is_removable(host) && host->rescan_entered)
@@ -2945,13 +4276,7 @@
 		mmc_release_host(host);
 		goto out;
 	}
-
-	for (i = 0; i < ARRAY_SIZE(freqs); i++) {
-		if (!mmc_rescan_try_freq(host, max(freqs[i], host->f_min)))
-			break;
-		if (freqs[i] <= host->f_min)
-			break;
-	}
++	mmc_rescan_try_freq(host, host->f_min);
 	mmc_release_host(host);
 
  out:
@@ -2961,18 +4286,18 @@
 
 void mmc_start_host(struct mmc_host *host)
 {
+	mmc_claim_host(host);
 	host->f_init = max(freqs[0], host->f_min);
 	host->rescan_disable = 0;
 	host->ios.power_mode = MMC_POWER_UNDEFINED;
 
-	mmc_claim_host(host);
 	if (host->caps2 & MMC_CAP2_NO_PRESCAN_POWERUP)
 		mmc_power_off(host);
 	else
 		mmc_power_up(host, host->ocr_avail);
-	mmc_release_host(host);
 
 	mmc_gpiod_request_cd_irq(host);
+	mmc_release_host(host);
 	_mmc_detect_change(host, 0, false);
 }
 
@@ -3055,7 +4380,9 @@
 	}
 
 	mmc_power_up(host, host->card->ocr);
+	mmc_claim_host(host);
 	ret = host->bus_ops->power_restore(host);
+	mmc_release_host(host);
 
 	mmc_bus_put(host);
 
@@ -3064,6 +4391,40 @@
 EXPORT_SYMBOL(mmc_power_restore_host);
 
 /*
+ * Add barrier request to the requests in cache
+ */
+int mmc_cache_barrier(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	int err = 0;
+
+	if (!card->ext_csd.cache_ctrl ||
+	     (card->quirks & MMC_QUIRK_CACHE_DISABLE))
+		goto out;
+
+	if (!mmc_card_mmc(card))
+		goto out;
+
+	if (!card->ext_csd.barrier_en)
+		return -ENOTSUPP;
+
+	/*
+	 * If a device receives maximum supported barrier
+	 * requests, a barrier command is treated as a
+	 * flush command. Hence, it is betetr to use
+	 * flush timeout instead a generic CMD6 timeout
+	 */
+	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+			EXT_CSD_FLUSH_CACHE, 0x2, 0);
+	if (err)
+		pr_err("%s: cache barrier error %d\n",
+				mmc_hostname(host), err);
+out:
+	return err;
+}
+EXPORT_SYMBOL(mmc_cache_barrier);
+
+/*
  * Flush the cache to the non-volatile storage.
  */
 int mmc_flush_cache(struct mmc_card *card)
@@ -3072,12 +4433,23 @@
 
 	if (mmc_card_mmc(card) &&
 			(card->ext_csd.cache_size > 0) &&
-			(card->ext_csd.cache_ctrl & 1)) {
+			(card->ext_csd.cache_ctrl & 1) &&
+			(!(card->quirks & MMC_QUIRK_CACHE_DISABLE))) {
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 				EXT_CSD_FLUSH_CACHE, 1, 0);
-		if (err)
+		if (err == -ETIMEDOUT) {
+			pr_err("%s: cache flush timeout\n",
+					mmc_hostname(card->host));
+			err = mmc_interrupt_hpi(card);
+			if (err) {
+				pr_err("%s: mmc_interrupt_hpi() failed (%d)\n",
+						mmc_hostname(card->host), err);
+				err = -ENODEV;
+			}
+		} else if (err) {
 			pr_err("%s: cache flush error %d\n",
 					mmc_hostname(card->host), err);
+		}
 	}
 
 	return err;
@@ -3130,6 +4502,10 @@
 
 		spin_lock_irqsave(&host->lock, flags);
 		host->rescan_disable = 0;
+		if (mmc_bus_manual_resume(host)) {
+			spin_unlock_irqrestore(&host->lock, flags);
+			break;
+		}
 		spin_unlock_irqrestore(&host->lock, flags);
 		_mmc_detect_change(host, 0, false);
 
diff --git a/drivers/mmc/core/core.h b/drivers/mmc/core/core.h
index c975c7a..3f39058 100644
--- a/drivers/mmc/core/core.h
+++ b/drivers/mmc/core/core.h
@@ -15,21 +15,6 @@
 
 #define MMC_CMD_RETRIES        3
 
-struct mmc_bus_ops {
-	void (*remove)(struct mmc_host *);
-	void (*detect)(struct mmc_host *);
-	int (*pre_suspend)(struct mmc_host *);
-	int (*suspend)(struct mmc_host *);
-	int (*resume)(struct mmc_host *);
-	int (*runtime_suspend)(struct mmc_host *);
-	int (*runtime_resume)(struct mmc_host *);
-	int (*power_save)(struct mmc_host *);
-	int (*power_restore)(struct mmc_host *);
-	int (*alive)(struct mmc_host *);
-	int (*shutdown)(struct mmc_host *);
-	int (*reset)(struct mmc_host *);
-};
-
 void mmc_attach_bus(struct mmc_host *host, const struct mmc_bus_ops *ops);
 void mmc_detach_bus(struct mmc_host *host);
 
@@ -40,6 +25,8 @@
 
 void mmc_set_chip_select(struct mmc_host *host, int mode);
 void mmc_set_clock(struct mmc_host *host, unsigned int hz);
+int mmc_clk_update_freq(struct mmc_host *host,
+		unsigned long freq, enum mmc_load state);
 void mmc_gate_clock(struct mmc_host *host);
 void mmc_ungate_clock(struct mmc_host *host);
 void mmc_set_ungated(struct mmc_host *host);
@@ -62,6 +49,8 @@
 	if (ms < 1000 / HZ) {
 		cond_resched();
 		mdelay(ms);
+	} else if (ms < jiffies_to_msecs(2)) {
+		usleep_range(ms * 1000, (ms + 1) * 1000);
 	} else {
 		msleep(ms);
 	}
@@ -89,6 +78,13 @@
 
 void mmc_init_context_info(struct mmc_host *host);
 
+extern bool mmc_can_scale_clk(struct mmc_host *host);
+extern int mmc_init_clk_scaling(struct mmc_host *host);
+extern int mmc_suspend_clk_scaling(struct mmc_host *host);
+extern int mmc_resume_clk_scaling(struct mmc_host *host);
+extern int mmc_exit_clk_scaling(struct mmc_host *host);
+extern unsigned long mmc_get_max_frequency(struct mmc_host *host);
+
 int mmc_execute_tuning(struct mmc_card *card);
 int mmc_hs200_to_hs400(struct mmc_card *card);
 int mmc_hs400_to_hs200(struct mmc_card *card);
diff --git a/drivers/mmc/core/debugfs.c b/drivers/mmc/core/debugfs.c
index bf0f6ce..0d0d56f 100644
--- a/drivers/mmc/core/debugfs.c
+++ b/drivers/mmc/core/debugfs.c
@@ -15,6 +15,7 @@
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/fault-inject.h>
+#include <linux/uaccess.h>
 
 #include <linux/mmc/card.h>
 #include <linux/mmc/host.h>
@@ -31,6 +32,26 @@
 #endif /* CONFIG_FAIL_MMC_REQUEST */
 
 /* The debugfs functions are optimized away when CONFIG_DEBUG_FS isn't set. */
+static int mmc_ring_buffer_show(struct seq_file *s, void *data)
+{
+	struct mmc_host *mmc = s->private;
+
+	mmc_dump_trace_buffer(mmc, s);
+	return 0;
+}
+
+static int mmc_ring_buffer_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mmc_ring_buffer_show, inode->i_private);
+}
+
+static const struct file_operations mmc_ring_buffer_fops = {
+	.open		= mmc_ring_buffer_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int mmc_ios_show(struct seq_file *s, void *data)
 {
 	static const char *vdd_str[] = {
@@ -234,6 +255,100 @@
 DEFINE_SIMPLE_ATTRIBUTE(mmc_clock_fops, mmc_clock_opt_get, mmc_clock_opt_set,
 	"%llu\n");
 
+#include <linux/delay.h>
+
+static int mmc_scale_get(void *data, u64 *val)
+{
+	struct mmc_host *host = data;
+
+	*val = host->clk_scaling.curr_freq;
+
+	return 0;
+}
+
+static int mmc_scale_set(void *data, u64 val)
+{
+	int err = 0;
+	struct mmc_host *host = data;
+
+	mmc_claim_host(host);
+	mmc_host_clk_hold(host);
+
+	/* change frequency from sysfs manually */
+	err = mmc_clk_update_freq(host, val, host->clk_scaling.state);
+	if (err == -EAGAIN)
+		err = 0;
+	else if (err)
+		pr_err("%s: clock scale to %llu failed with error %d\n",
+			mmc_hostname(host), val, err);
+	else
+		pr_debug("%s: clock change to %llu finished successfully (%s)\n",
+			mmc_hostname(host), val, current->comm);
+
+	mmc_host_clk_release(host);
+	mmc_release_host(host);
+
+	return err;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(mmc_scale_fops, mmc_scale_get, mmc_scale_set,
+	"%llu\n");
+
+static int mmc_max_clock_get(void *data, u64 *val)
+{
+	struct mmc_host *host = data;
+
+	if (!host)
+		return -EINVAL;
+
+	*val = host->f_max;
+
+	return 0;
+}
+
+static int mmc_max_clock_set(void *data, u64 val)
+{
+	struct mmc_host *host = data;
+	int err = -EINVAL;
+	unsigned long freq = val;
+	unsigned int old_freq;
+
+	if (!host || (val < host->f_min))
+		goto out;
+
+	mmc_claim_host(host);
+	if (host->bus_ops && host->bus_ops->change_bus_speed) {
+		old_freq = host->f_max;
+		host->f_max = freq;
+
+		err = host->bus_ops->change_bus_speed(host, &freq);
+
+		if (err)
+			host->f_max = old_freq;
+	}
+	mmc_release_host(host);
+out:
+	return err;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(mmc_max_clock_fops, mmc_max_clock_get,
+		mmc_max_clock_set, "%llu\n");
+
+static int mmc_force_err_set(void *data, u64 val)
+{
+	struct mmc_host *host = data;
+
+	if (host && host->ops && host->ops->force_err_irq) {
+		mmc_host_clk_hold(host);
+		host->ops->force_err_irq(host, val);
+		mmc_host_clk_release(host);
+	}
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(mmc_force_err_fops, NULL, mmc_force_err_set, "%llu\n");
+
 void mmc_add_host_debugfs(struct mmc_host *host)
 {
 	struct dentry *root;
@@ -256,6 +371,29 @@
 			&mmc_clock_fops))
 		goto err_node;
 
+	if (!debugfs_create_file("max_clock", S_IRUSR | S_IWUSR, root, host,
+		&mmc_max_clock_fops))
+		goto err_node;
+
+	if (!debugfs_create_file("scale", S_IRUSR | S_IWUSR, root, host,
+		&mmc_scale_fops))
+		goto err_node;
+
+	if (!debugfs_create_bool("skip_clk_scale_freq_update",
+		S_IRUSR | S_IWUSR, root,
+		&host->clk_scaling.skip_clk_scale_freq_update))
+		goto err_node;
+
+	if (!debugfs_create_bool("cmdq_task_history",
+		S_IRUSR | S_IWUSR, root,
+		&host->cmdq_thist_enabled))
+		goto err_node;
+
+#ifdef CONFIG_MMC_RING_BUFFER
+	if (!debugfs_create_file("ring_buffer", S_IRUSR,
+				root, host, &mmc_ring_buffer_fops))
+		goto err_node;
+#endif
 #ifdef CONFIG_MMC_CLKGATE
 	if (!debugfs_create_u32("clk_delay", (S_IRUSR | S_IWUSR),
 				root, &host->clk_delay))
@@ -270,6 +408,10 @@
 					     &host->fail_mmc_request)))
 		goto err_node;
 #endif
+	if (!debugfs_create_file("force_error", S_IWUSR, root, host,
+		&mmc_force_err_fops))
+		goto err_node;
+
 	return;
 
 err_node:
@@ -291,11 +433,26 @@
 	int		ret;
 
 	mmc_get_card(card);
+	if (mmc_card_cmdq(card)) {
+		ret = mmc_cmdq_halt_on_empty_queue(card->host);
+		if (ret) {
+			pr_err("%s: halt failed while doing %s err (%d)\n",
+					mmc_hostname(card->host), __func__,
+					ret);
+			goto out;
+		}
+	}
 
 	ret = mmc_send_status(data, &status);
 	if (!ret)
 		*val = status;
 
+	if (mmc_card_cmdq(card)) {
+		if (mmc_cmdq_halt(card->host, false))
+			pr_err("%s: %s: cmdq unhalt failed\n",
+			       mmc_hostname(card->host), __func__);
+	}
+out:
 	mmc_put_card(card);
 
 	return ret;
@@ -318,8 +475,18 @@
 		return -ENOMEM;
 
 	mmc_get_card(card);
+	if (mmc_card_cmdq(card)) {
+		err = mmc_cmdq_halt_on_empty_queue(card->host);
+		if (err) {
+			pr_err("%s: halt failed while doing %s err (%d)\n",
+					mmc_hostname(card->host), __func__,
+					err);
+			mmc_put_card(card);
+			goto out_free_halt;
+		}
+	}
+
 	err = mmc_get_ext_csd(card, &ext_csd);
-	mmc_put_card(card);
 	if (err)
 		goto out_free;
 
@@ -329,10 +496,25 @@
 	BUG_ON(n != EXT_CSD_STR_LEN);
 
 	filp->private_data = buf;
+
+	if (mmc_card_cmdq(card)) {
+		if (mmc_cmdq_halt(card->host, false))
+			pr_err("%s: %s: cmdq unhalt failed\n",
+			       mmc_hostname(card->host), __func__);
+	}
+
+	mmc_put_card(card);
 	kfree(ext_csd);
 	return 0;
 
 out_free:
+	if (mmc_card_cmdq(card)) {
+		if (mmc_cmdq_halt(card->host, false))
+			pr_err("%s: %s: cmdq unhalt failed\n",
+			       mmc_hostname(card->host), __func__);
+	}
+	mmc_put_card(card);
+out_free_halt:
 	kfree(buf);
 	return err;
 }
@@ -359,6 +541,275 @@
 	.llseek		= default_llseek,
 };
 
+static int mmc_wr_pack_stats_open(struct inode *inode, struct file *filp)
+{
+	struct mmc_card *card = inode->i_private;
+
+	filp->private_data = card;
+	card->wr_pack_stats.print_in_read = 1;
+	return 0;
+}
+
+#define TEMP_BUF_SIZE 256
+static ssize_t mmc_wr_pack_stats_read(struct file *filp, char __user *ubuf,
+				size_t cnt, loff_t *ppos)
+{
+	struct mmc_card *card = filp->private_data;
+	struct mmc_wr_pack_stats *pack_stats;
+	int i;
+	int max_num_of_packed_reqs = 0;
+	char *temp_buf;
+
+	if (!card)
+		return cnt;
+
+	if (!access_ok(VERIFY_WRITE, ubuf, cnt))
+		return cnt;
+
+	if (!card->wr_pack_stats.print_in_read)
+		return 0;
+
+	if (!card->wr_pack_stats.enabled) {
+		pr_info("%s: write packing statistics are disabled\n",
+			 mmc_hostname(card->host));
+		goto exit;
+	}
+
+	pack_stats = &card->wr_pack_stats;
+
+	if (!pack_stats->packing_events) {
+		pr_info("%s: NULL packing_events\n", mmc_hostname(card->host));
+		goto exit;
+	}
+
+	max_num_of_packed_reqs = card->ext_csd.max_packed_writes;
+
+	temp_buf = kmalloc(TEMP_BUF_SIZE, GFP_KERNEL);
+	if (!temp_buf)
+		goto exit;
+
+	spin_lock(&pack_stats->lock);
+
+	snprintf(temp_buf, TEMP_BUF_SIZE, "%s: write packing statistics:\n",
+		mmc_hostname(card->host));
+	strlcat(ubuf, temp_buf, cnt);
+
+	for (i = 1 ; i <= max_num_of_packed_reqs ; ++i) {
+		if (pack_stats->packing_events[i]) {
+			snprintf(temp_buf, TEMP_BUF_SIZE,
+				 "%s: Packed %d reqs - %d times\n",
+				mmc_hostname(card->host), i,
+				pack_stats->packing_events[i]);
+			strlcat(ubuf, temp_buf, cnt);
+		}
+	}
+
+	snprintf(temp_buf, TEMP_BUF_SIZE,
+		 "%s: stopped packing due to the following reasons:\n",
+		 mmc_hostname(card->host));
+	strlcat(ubuf, temp_buf, cnt);
+
+	if (pack_stats->pack_stop_reason[EXCEEDS_SEGMENTS]) {
+		snprintf(temp_buf, TEMP_BUF_SIZE,
+			 "%s: %d times: exceed max num of segments\n",
+			 mmc_hostname(card->host),
+			 pack_stats->pack_stop_reason[EXCEEDS_SEGMENTS]);
+		strlcat(ubuf, temp_buf, cnt);
+	}
+	if (pack_stats->pack_stop_reason[EXCEEDS_SECTORS]) {
+		snprintf(temp_buf, TEMP_BUF_SIZE,
+			 "%s: %d times: exceed max num of sectors\n",
+			mmc_hostname(card->host),
+			pack_stats->pack_stop_reason[EXCEEDS_SECTORS]);
+		strlcat(ubuf, temp_buf, cnt);
+	}
+	if (pack_stats->pack_stop_reason[WRONG_DATA_DIR]) {
+		snprintf(temp_buf, TEMP_BUF_SIZE,
+			 "%s: %d times: wrong data direction\n",
+			mmc_hostname(card->host),
+			pack_stats->pack_stop_reason[WRONG_DATA_DIR]);
+		strlcat(ubuf, temp_buf, cnt);
+	}
+	if (pack_stats->pack_stop_reason[FLUSH_OR_DISCARD]) {
+		snprintf(temp_buf, TEMP_BUF_SIZE,
+			 "%s: %d times: flush or discard\n",
+			mmc_hostname(card->host),
+			pack_stats->pack_stop_reason[FLUSH_OR_DISCARD]);
+		strlcat(ubuf, temp_buf, cnt);
+	}
+	if (pack_stats->pack_stop_reason[EMPTY_QUEUE]) {
+		snprintf(temp_buf, TEMP_BUF_SIZE,
+			 "%s: %d times: empty queue\n",
+			mmc_hostname(card->host),
+			pack_stats->pack_stop_reason[EMPTY_QUEUE]);
+		strlcat(ubuf, temp_buf, cnt);
+	}
+	if (pack_stats->pack_stop_reason[REL_WRITE]) {
+		snprintf(temp_buf, TEMP_BUF_SIZE,
+			 "%s: %d times: rel write\n",
+			mmc_hostname(card->host),
+			pack_stats->pack_stop_reason[REL_WRITE]);
+		strlcat(ubuf, temp_buf, cnt);
+	}
+	if (pack_stats->pack_stop_reason[THRESHOLD]) {
+		snprintf(temp_buf, TEMP_BUF_SIZE,
+			 "%s: %d times: Threshold\n",
+			mmc_hostname(card->host),
+			pack_stats->pack_stop_reason[THRESHOLD]);
+		strlcat(ubuf, temp_buf, cnt);
+	}
+
+	if (pack_stats->pack_stop_reason[LARGE_SEC_ALIGN]) {
+		snprintf(temp_buf, TEMP_BUF_SIZE,
+			 "%s: %d times: Large sector alignment\n",
+			mmc_hostname(card->host),
+			pack_stats->pack_stop_reason[LARGE_SEC_ALIGN]);
+		strlcat(ubuf, temp_buf, cnt);
+	}
+	if (pack_stats->pack_stop_reason[RANDOM]) {
+		snprintf(temp_buf, TEMP_BUF_SIZE,
+			 "%s: %d times: random request\n",
+			mmc_hostname(card->host),
+			pack_stats->pack_stop_reason[RANDOM]);
+		strlcat(ubuf, temp_buf, cnt);
+	}
+	if (pack_stats->pack_stop_reason[FUA]) {
+		snprintf(temp_buf, TEMP_BUF_SIZE,
+			 "%s: %d times: fua request\n",
+			mmc_hostname(card->host),
+			pack_stats->pack_stop_reason[FUA]);
+		strlcat(ubuf, temp_buf, cnt);
+	}
+
+	spin_unlock(&pack_stats->lock);
+
+	kfree(temp_buf);
+
+	pr_info("%s", ubuf);
+
+exit:
+	if (card->wr_pack_stats.print_in_read == 1) {
+		card->wr_pack_stats.print_in_read = 0;
+		return strnlen(ubuf, cnt);
+	}
+
+	return 0;
+}
+
+static ssize_t mmc_wr_pack_stats_write(struct file *filp,
+				       const char __user *ubuf, size_t cnt,
+				       loff_t *ppos)
+{
+	struct mmc_card *card = filp->private_data;
+	int value;
+
+	if (!card)
+		return cnt;
+
+	if (!access_ok(VERIFY_READ, ubuf, cnt))
+		return cnt;
+
+	sscanf(ubuf, "%d", &value);
+	if (value) {
+		mmc_blk_init_packed_statistics(card);
+	} else {
+		spin_lock(&card->wr_pack_stats.lock);
+		card->wr_pack_stats.enabled = false;
+		spin_unlock(&card->wr_pack_stats.lock);
+	}
+
+	return cnt;
+}
+
+static const struct file_operations mmc_dbg_wr_pack_stats_fops = {
+	.open		= mmc_wr_pack_stats_open,
+	.read		= mmc_wr_pack_stats_read,
+	.write		= mmc_wr_pack_stats_write,
+};
+
+static int mmc_bkops_stats_read(struct seq_file *file, void *data)
+{
+	struct mmc_card *card = file->private;
+	struct mmc_bkops_stats *stats;
+	int i;
+
+	if (!card)
+		return -EINVAL;
+
+	stats = &card->bkops.stats;
+
+	if (!stats->enabled) {
+		pr_info("%s: bkops statistics are disabled\n",
+			 mmc_hostname(card->host));
+		goto exit;
+	}
+
+	spin_lock(&stats->lock);
+
+	seq_printf(file, "%s: bkops statistics:\n",
+			mmc_hostname(card->host));
+	seq_printf(file, "%s: BKOPS: sent START_BKOPS to device: %u\n",
+			mmc_hostname(card->host), stats->manual_start);
+	seq_printf(file, "%s: BKOPS: stopped due to HPI: %u\n",
+			mmc_hostname(card->host), stats->hpi);
+	seq_printf(file, "%s: BKOPS: sent AUTO_EN set to 1: %u\n",
+			mmc_hostname(card->host), stats->auto_start);
+	seq_printf(file, "%s: BKOPS: sent AUTO_EN set to 0: %u\n",
+			mmc_hostname(card->host), stats->auto_stop);
+
+	for (i = 0 ; i < MMC_BKOPS_NUM_SEVERITY_LEVELS ; ++i)
+		seq_printf(file, "%s: BKOPS: due to level %d: %u\n",
+			 mmc_hostname(card->host), i, stats->level[i]);
+
+	spin_unlock(&stats->lock);
+
+exit:
+
+	return 0;
+}
+
+static ssize_t mmc_bkops_stats_write(struct file *filp,
+				      const char __user *ubuf, size_t cnt,
+				      loff_t *ppos)
+{
+	struct mmc_card *card = filp->f_mapping->host->i_private;
+	int value;
+	struct mmc_bkops_stats *stats;
+	int err;
+
+	if (!card)
+		return cnt;
+
+	stats = &card->bkops.stats;
+
+	err = kstrtoint_from_user(ubuf, cnt, 0, &value);
+	if (err) {
+		pr_err("%s: %s: error parsing input from user (%d)\n",
+				mmc_hostname(card->host), __func__, err);
+		return err;
+	}
+	if (value) {
+		mmc_blk_init_bkops_statistics(card);
+	} else {
+		spin_lock(&stats->lock);
+		stats->enabled = false;
+		spin_unlock(&stats->lock);
+	}
+
+	return cnt;
+}
+
+static int mmc_bkops_stats_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, mmc_bkops_stats_read, inode->i_private);
+}
+
+static const struct file_operations mmc_dbg_bkops_stats_fops = {
+	.open		= mmc_bkops_stats_open,
+	.read		= seq_read,
+	.write		= mmc_bkops_stats_write,
+};
+
 void mmc_add_card_debugfs(struct mmc_card *card)
 {
 	struct mmc_host	*host = card->host;
@@ -391,6 +842,19 @@
 					&mmc_dbg_ext_csd_fops))
 			goto err;
 
+	if (mmc_card_mmc(card) && (card->ext_csd.rev >= 6) &&
+	    (card->host->caps2 & MMC_CAP2_PACKED_WR))
+		if (!debugfs_create_file("wr_pack_stats", S_IRUSR, root, card,
+					 &mmc_dbg_wr_pack_stats_fops))
+			goto err;
+
+	if (mmc_card_mmc(card) && (card->ext_csd.rev >= 5) &&
+	    (mmc_card_configured_auto_bkops(card) ||
+	     mmc_card_configured_manual_bkops(card)))
+		if (!debugfs_create_file("bkops_stats", S_IRUSR, root, card,
+					 &mmc_dbg_bkops_stats_fops))
+			goto err;
+
 	return;
 
 err:
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index e9f74a2..eb730fd 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -4,6 +4,7 @@
  *  Copyright (C) 2003 Russell King, All Rights Reserved.
  *  Copyright (C) 2007-2008 Pierre Ossman
  *  Copyright (C) 2010 Linus Walleij
+ *  Copyright (c) 2012, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -24,6 +25,8 @@
 
 #include <linux/mmc/host.h>
 #include <linux/mmc/card.h>
+#include <linux/mmc/ring_buffer.h>
+
 #include <linux/mmc/slot-gpio.h>
 
 #include "core.h"
@@ -31,6 +34,10 @@
 #include "slot-gpio.h"
 #include "pwrseq.h"
 
+#define MMC_DEVFRQ_DEFAULT_UP_THRESHOLD 35
+#define MMC_DEVFRQ_DEFAULT_DOWN_THRESHOLD 5
+#define MMC_DEVFRQ_DEFAULT_POLLING_MSEC 100
+
 static DEFINE_IDA(mmc_host_ida);
 static DEFINE_SPINLOCK(mmc_host_lock);
 
@@ -164,6 +171,7 @@
 	if (host->clk_gated) {
 		spin_unlock_irqrestore(&host->clk_lock, flags);
 		mmc_ungate_clock(host);
+
 		spin_lock_irqsave(&host->clk_lock, flags);
 		pr_debug("%s: ungated MCI clock\n", mmc_hostname(host));
 	}
@@ -176,11 +184,19 @@
  *	mmc_host_may_gate_card - check if this card may be gated
  *	@card: card to check.
  */
-static bool mmc_host_may_gate_card(struct mmc_card *card)
+bool mmc_host_may_gate_card(struct mmc_card *card)
 {
 	/* If there is no card we may gate it */
 	if (!card)
 		return true;
+
+	/*
+	 * SDIO3.0 card allows the clock to be gated off so check if
+	 * that is the case or not.
+	 */
+	if (mmc_card_sdio(card) && card->cccr.async_intr_sup)
+			return true;
+
 	/*
 	 * Don't gate SDIO cards! These need to be clocked at all times
 	 * since they may be independent systems generating interrupts
@@ -649,6 +665,217 @@
 
 EXPORT_SYMBOL(mmc_alloc_host);
 
+static ssize_t show_enable(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+
+	if (!host)
+		return -EINVAL;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", mmc_can_scale_clk(host));
+}
+
+static ssize_t store_enable(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	unsigned long value;
+
+	if (!host || !host->card || kstrtoul(buf, 0, &value))
+		return -EINVAL;
+
+	mmc_get_card(host->card);
+
+	if (!value) {
+		/*turning off clock scaling*/
+		mmc_exit_clk_scaling(host);
+		host->caps2 &= ~MMC_CAP2_CLK_SCALE;
+		host->clk_scaling.state = MMC_LOAD_HIGH;
+		/* Set to max. frequency when disabling */
+		mmc_clk_update_freq(host, host->card->clk_scaling_highest,
+					host->clk_scaling.state);
+	} else if (value) {
+		/* starting clock scaling, will restart in case started */
+		host->caps2 |= MMC_CAP2_CLK_SCALE;
+		if (host->clk_scaling.enable)
+			mmc_exit_clk_scaling(host);
+		mmc_init_clk_scaling(host);
+	}
+
+	mmc_put_card(host->card);
+
+	return count;
+}
+
+static ssize_t show_up_threshold(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+
+	if (!host)
+		return -EINVAL;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", host->clk_scaling.upthreshold);
+}
+
+#define MAX_PERCENTAGE	100
+static ssize_t store_up_threshold(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	unsigned long value;
+
+	if (!host || kstrtoul(buf, 0, &value) || (value > MAX_PERCENTAGE))
+		return -EINVAL;
+
+	host->clk_scaling.upthreshold = value;
+
+	pr_debug("%s: clkscale_up_thresh set to %lu\n",
+			mmc_hostname(host), value);
+	return count;
+}
+
+static ssize_t show_down_threshold(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+
+	if (!host)
+		return -EINVAL;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n",
+			host->clk_scaling.downthreshold);
+}
+
+static ssize_t store_down_threshold(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	unsigned long value;
+
+	if (!host || kstrtoul(buf, 0, &value) || (value > MAX_PERCENTAGE))
+		return -EINVAL;
+
+	host->clk_scaling.downthreshold = value;
+
+	pr_debug("%s: clkscale_down_thresh set to %lu\n",
+			mmc_hostname(host), value);
+	return count;
+}
+
+static ssize_t show_polling(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+
+	if (!host)
+		return -EINVAL;
+
+	return snprintf(buf, PAGE_SIZE, "%lu milliseconds\n",
+			host->clk_scaling.polling_delay_ms);
+}
+
+static ssize_t store_polling(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	unsigned long value;
+
+	if (!host || kstrtoul(buf, 0, &value))
+		return -EINVAL;
+
+	host->clk_scaling.polling_delay_ms = value;
+
+	pr_debug("%s: clkscale_polling_delay_ms set to %lu\n",
+			mmc_hostname(host), value);
+	return count;
+}
+
+DEVICE_ATTR(enable, S_IRUGO | S_IWUSR,
+		show_enable, store_enable);
+DEVICE_ATTR(polling_interval, S_IRUGO | S_IWUSR,
+		show_polling, store_polling);
+DEVICE_ATTR(up_threshold, S_IRUGO | S_IWUSR,
+		show_up_threshold, store_up_threshold);
+DEVICE_ATTR(down_threshold, S_IRUGO | S_IWUSR,
+		show_down_threshold, store_down_threshold);
+
+static struct attribute *clk_scaling_attrs[] = {
+	&dev_attr_enable.attr,
+	&dev_attr_up_threshold.attr,
+	&dev_attr_down_threshold.attr,
+	&dev_attr_polling_interval.attr,
+	NULL,
+};
+
+static struct attribute_group clk_scaling_attr_grp = {
+	.name = "clk_scaling",
+	.attrs = clk_scaling_attrs,
+};
+
+#ifdef CONFIG_MMC_PERF_PROFILING
+static ssize_t
+show_perf(struct device *dev, struct device_attribute *attr, char *buf)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	int64_t rtime_drv, wtime_drv;
+	unsigned long rbytes_drv, wbytes_drv, flags;
+
+	spin_lock_irqsave(&host->lock, flags);
+
+	rbytes_drv = host->perf.rbytes_drv;
+	wbytes_drv = host->perf.wbytes_drv;
+
+	rtime_drv = ktime_to_us(host->perf.rtime_drv);
+	wtime_drv = ktime_to_us(host->perf.wtime_drv);
+
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	return snprintf(buf, PAGE_SIZE, "Write performance at driver Level:"
+					"%lu bytes in %lld microseconds\n"
+					"Read performance at driver Level:"
+					"%lu bytes in %lld microseconds\n",
+					wbytes_drv, wtime_drv,
+					rbytes_drv, rtime_drv);
+}
+
+static ssize_t
+set_perf(struct device *dev, struct device_attribute *attr,
+		const char *buf, size_t count)
+{
+	struct mmc_host *host = cls_dev_to_mmc_host(dev);
+	int64_t value;
+	unsigned long flags;
+
+	sscanf(buf, "%lld", &value);
+	spin_lock_irqsave(&host->lock, flags);
+	if (!value) {
+		memset(&host->perf, 0, sizeof(host->perf));
+		host->perf_enable = false;
+	} else {
+		host->perf_enable = true;
+	}
+	spin_unlock_irqrestore(&host->lock, flags);
+
+	return count;
+}
+
+static DEVICE_ATTR(perf, S_IRUGO | S_IWUSR,
+		show_perf, set_perf);
+
+#endif
+
+static struct attribute *dev_attrs[] = {
+#ifdef CONFIG_MMC_PERF_PROFILING
+	&dev_attr_perf.attr,
+#endif
+	NULL,
+};
+static struct attribute_group dev_attr_grp = {
+	.attrs = dev_attrs,
+};
+
 /**
  *	mmc_add_host - initialise host hardware
  *	@host: mmc host
@@ -670,15 +897,31 @@
 
 	led_trigger_register_simple(dev_name(&host->class_dev), &host->led);
 
+	host->clk_scaling.upthreshold = MMC_DEVFRQ_DEFAULT_UP_THRESHOLD;
+	host->clk_scaling.downthreshold = MMC_DEVFRQ_DEFAULT_DOWN_THRESHOLD;
+	host->clk_scaling.polling_delay_ms = MMC_DEVFRQ_DEFAULT_POLLING_MSEC;
+	host->clk_scaling.skip_clk_scale_freq_update = false;
+
 #ifdef CONFIG_DEBUG_FS
 	mmc_add_host_debugfs(host);
 #endif
 	mmc_host_clk_sysfs_init(host);
+	mmc_trace_init(host);
+
+	err = sysfs_create_group(&host->class_dev.kobj, &clk_scaling_attr_grp);
+	if (err)
+		pr_err("%s: failed to create clk scale sysfs group with err %d\n",
+				__func__, err);
 
 #ifdef CONFIG_BLOCK
 	mmc_latency_hist_sysfs_init(host);
 #endif
 
+	err = sysfs_create_group(&host->class_dev.kobj, &dev_attr_grp);
+	if (err)
+		pr_err("%s: failed to create sysfs group with err %d\n",
+							 __func__, err);
+
 	mmc_start_host(host);
 	if (!(host->pm_flags & MMC_PM_IGNORE_PM_NOTIFY))
 		mmc_register_pm_notifier(host);
@@ -710,6 +953,9 @@
 	mmc_latency_hist_sysfs_exit(host);
 #endif
 
+	sysfs_remove_group(&host->parent->kobj, &dev_attr_grp);
+	sysfs_remove_group(&host->class_dev.kobj, &clk_scaling_attr_grp);
+
 	device_del(&host->class_dev);
 
 	led_trigger_unregister_simple(host->led);
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index cb179fb..a36bcbb 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -19,6 +19,8 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/mmc.h>
+#include <linux/reboot.h>
+#include <trace/events/mmc.h>
 
 #include "core.h"
 #include "host.h"
@@ -72,6 +74,7 @@
 		__res & __mask;						\
 	})
 
+static int mmc_switch_status(struct mmc_card *card, bool ignore_crc);
 /*
  * Given the decoded CSD structure, decode the raw CID to our CID structure.
  */
@@ -137,6 +140,19 @@
 	mmc_init_erase(card);
 }
 
+static const struct mmc_fixup mmc_fixups[] = {
+
+	/* avoid HPI for specific cards */
+	MMC_FIXUP_EXT_CSD_REV("MMC16G", CID_MANFID_KINGSTON, CID_OEMID_ANY,
+		add_quirk, MMC_QUIRK_BROKEN_HPI, MMC_V4_41),
+
+	/* Disable cache for specific cards */
+	MMC_FIXUP("MMC16G", CID_MANFID_KINGSTON, CID_OEMID_ANY,
+		add_quirk_mmc, MMC_QUIRK_CACHE_DISABLE),
+
+	END_FIXUP
+};
+
 /*
  * Given a 128-bit response, decode to our card CSD structure.
  */
@@ -388,9 +404,6 @@
 	 */
 	card->ext_csd.rev = ext_csd[EXT_CSD_REV];
 
-	/* fixup device after ext_csd revision field is updated */
-	mmc_fixup_device(card, mmc_ext_csd_fixups);
-
 	card->ext_csd.raw_sectors[0] = ext_csd[EXT_CSD_SEC_CNT + 0];
 	card->ext_csd.raw_sectors[1] = ext_csd[EXT_CSD_SEC_CNT + 1];
 	card->ext_csd.raw_sectors[2] = ext_csd[EXT_CSD_SEC_CNT + 2];
@@ -516,6 +529,25 @@
 			ext_csd[EXT_CSD_PWR_CL_DDR_200_360];
 	}
 
+	/* check whether the eMMC card supports HPI */
+	if ((ext_csd[EXT_CSD_HPI_FEATURES] & 0x1) &&
+		!(card->quirks & MMC_QUIRK_BROKEN_HPI)) {
+		card->ext_csd.hpi = 1;
+		if (ext_csd[EXT_CSD_HPI_FEATURES] & 0x2)
+			card->ext_csd.hpi_cmd = MMC_STOP_TRANSMISSION;
+		else
+			card->ext_csd.hpi_cmd = MMC_SEND_STATUS;
+		/*
+		 * Indicate the maximum timeout to close
+		 * a command interrupted by HPI
+		 */
+		card->ext_csd.out_of_int_time =
+			ext_csd[EXT_CSD_OUT_OF_INTERRUPT_TIME] * 10;
+		pr_info("%s: Out-of-interrupt timeout is %d[ms]\n",
+				mmc_hostname(card->host),
+				card->ext_csd.out_of_int_time);
+	}
+
 	if (card->ext_csd.rev >= 5) {
 		/* Adjust production date as per JEDEC JESD84-B451 */
 		if (card->cid.year < 2010)
@@ -523,16 +555,16 @@
 
 		/* check whether the eMMC card supports BKOPS */
 		if (!mmc_card_broken_hpi(card) &&
-		    ext_csd[EXT_CSD_BKOPS_SUPPORT] & 0x1) {
+		    (ext_csd[EXT_CSD_BKOPS_SUPPORT] & 0x1) &&
+				card->ext_csd.hpi) {
 			card->ext_csd.bkops = 1;
-			card->ext_csd.man_bkops_en =
-					(ext_csd[EXT_CSD_BKOPS_EN] &
-						EXT_CSD_MANUAL_BKOPS_MASK);
+			card->ext_csd.bkops_en = ext_csd[EXT_CSD_BKOPS_EN];
 			card->ext_csd.raw_bkops_status =
 				ext_csd[EXT_CSD_BKOPS_STATUS];
-			if (!card->ext_csd.man_bkops_en)
-				pr_debug("%s: MAN_BKOPS_EN bit is not set\n",
-					mmc_hostname(card->host));
+			if (!card->ext_csd.bkops_en)
+				pr_info("%s: BKOPS_EN equals 0x%x\n",
+					mmc_hostname(card->host),
+					card->ext_csd.bkops_en);
 		}
 
 		/* check whether the eMMC card supports HPI */
@@ -555,6 +587,19 @@
 		card->ext_csd.rst_n_function = ext_csd[EXT_CSD_RST_N_FUNCTION];
 
 		/*
+		 * Some eMMC vendors violate eMMC 5.0 spec and set
+		 * REL_WR_SEC_C register to 0x10 to indicate the
+		 * ability of RPMB throughput improvement thus lead
+		 * to failure when TZ module write data to RPMB
+		 * partition. So check bit[4] of EXT_CSD[166] and
+		 * if it is not set then change value of REL_WR_SEC_C
+		 * to 0x1 directly ignoring value of EXT_CSD[222].
+		 */
+		if (!(card->ext_csd.rel_param &
+					EXT_CSD_WR_REL_PARAM_EN_RPMB_REL_WR))
+			card->ext_csd.rel_sectors = 0x1;
+
+		/*
 		 * RPMB regions are defined in multiples of 128K.
 		 */
 		card->ext_csd.raw_rpmb_size_mult = ext_csd[EXT_CSD_RPMB_MULT];
@@ -610,6 +655,46 @@
 		card->ext_csd.data_sector_size = 512;
 	}
 
+	if (card->ext_csd.rev >= 7) {
+		/* Enhance Strobe is supported since v5.1 which rev should be
+		 * 8 but some eMMC devices can support it with rev 7. So handle
+		 * Enhance Strobe here.
+		 */
+		card->ext_csd.strobe_support = ext_csd[EXT_CSD_STROBE_SUPPORT];
+		card->ext_csd.cmdq_support = ext_csd[EXT_CSD_CMDQ_SUPPORT];
+		card->ext_csd.fw_version = ext_csd[EXT_CSD_FIRMWARE_VERSION];
+		pr_info("%s: eMMC FW version: 0x%02x\n",
+			mmc_hostname(card->host),
+			card->ext_csd.fw_version);
+		if (card->ext_csd.cmdq_support) {
+			/*
+			 * Queue Depth = N + 1,
+			 * see JEDEC JESD84-B51 section 7.4.19
+			 */
+			card->ext_csd.cmdq_depth =
+				ext_csd[EXT_CSD_CMDQ_DEPTH] + 1;
+			pr_info("%s: CMDQ supported: depth: %d\n",
+				mmc_hostname(card->host),
+				card->ext_csd.cmdq_depth);
+		}
+		card->ext_csd.barrier_support =
+			ext_csd[EXT_CSD_BARRIER_SUPPORT];
+		card->ext_csd.cache_flush_policy =
+			ext_csd[EXT_CSD_CACHE_FLUSH_POLICY];
+		pr_info("%s: cache barrier support %d flush policy %d\n",
+				mmc_hostname(card->host),
+				card->ext_csd.barrier_support,
+				card->ext_csd.cache_flush_policy);
+		card->ext_csd.enhanced_rpmb_supported =
+			(card->ext_csd.rel_param &
+			 EXT_CSD_WR_REL_PARAM_EN_RPMB_REL_WR);
+	} else {
+		card->ext_csd.cmdq_support = 0;
+		card->ext_csd.cmdq_depth = 0;
+		card->ext_csd.barrier_support = 0;
+		card->ext_csd.cache_flush_policy = 0;
+	}
+
 	/* eMMC v5 or later */
 	if (card->ext_csd.rev >= 7) {
 		memcpy(card->ext_csd.fwrev, &ext_csd[EXT_CSD_FIRMWARE_VERSION],
@@ -630,6 +715,7 @@
 
 static int mmc_read_ext_csd(struct mmc_card *card)
 {
+	struct mmc_host *host = card->host;
 	u8 *ext_csd;
 	int err;
 
@@ -638,6 +724,9 @@
 
 	err = mmc_get_ext_csd(card, &ext_csd);
 	if (err) {
+		pr_err("%s: %s: mmc_get_ext_csd() fails %d\n",
+				mmc_hostname(host), __func__, err);
+
 		/* If the host or the card can't do the switch,
 		 * fail more gracefully. */
 		if ((err != -EINVAL)
@@ -661,6 +750,7 @@
 		return err;
 	}
 
+	card->cached_ext_csd = ext_csd;
 	err = mmc_decode_ext_csd(card, ext_csd);
 	kfree(ext_csd);
 	return err;
@@ -762,6 +852,8 @@
 		card->ext_csd.enhanced_area_offset);
 MMC_DEV_ATTR(enhanced_area_size, "%u\n", card->ext_csd.enhanced_area_size);
 MMC_DEV_ATTR(raw_rpmb_size_mult, "%#x\n", card->ext_csd.raw_rpmb_size_mult);
+MMC_DEV_ATTR(enhanced_rpmb_supported, "%#x\n",
+		card->ext_csd.enhanced_rpmb_supported);
 MMC_DEV_ATTR(rel_sectors, "%#x\n", card->ext_csd.rel_sectors);
 MMC_DEV_ATTR(ocr, "%08x\n", card->ocr);
 
@@ -817,6 +909,7 @@
 	&dev_attr_enhanced_area_offset.attr,
 	&dev_attr_enhanced_area_size.attr,
 	&dev_attr_raw_rpmb_size_mult.attr,
+	&dev_attr_enhanced_rpmb_supported.attr,
 	&dev_attr_rel_sectors.attr,
 	&dev_attr_ocr.attr,
 	&dev_attr_dsr.attr,
@@ -953,11 +1046,11 @@
  */
 static int mmc_select_bus_width(struct mmc_card *card)
 {
-	static unsigned ext_csd_bits[] = {
+	static const unsigned ext_csd_bits[] = {
 		EXT_CSD_BUS_WIDTH_8,
 		EXT_CSD_BUS_WIDTH_4,
 	};
-	static unsigned bus_widths[] = {
+	static const unsigned bus_widths[] = {
 		MMC_BUS_WIDTH_8,
 		MMC_BUS_WIDTH_4,
 	};
@@ -1018,12 +1111,12 @@
 }
 
 /* Caller must hold re-tuning */
-static int mmc_switch_status(struct mmc_card *card)
+static int mmc_switch_status(struct mmc_card *card, bool ignore_crc)
 {
 	u32 status;
 	int err;
 
-	err = mmc_send_status(card, &status);
+	err = __mmc_send_status(card, &status, ignore_crc);
 	if (err)
 		return err;
 
@@ -1043,7 +1136,7 @@
 			   true, false, true);
 	if (!err) {
 		mmc_set_timing(card->host, MMC_TIMING_MMC_HS);
-		err = mmc_switch_status(card);
+		err = mmc_switch_status(card, false);
 	}
 
 	if (err)
@@ -1072,10 +1165,11 @@
 	ext_csd_bits = (bus_width == MMC_BUS_WIDTH_8) ?
 		EXT_CSD_DDR_BUS_WIDTH_8 : EXT_CSD_DDR_BUS_WIDTH_4;
 
-	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+	err = __mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 			EXT_CSD_BUS_WIDTH,
 			ext_csd_bits,
-			card->ext_csd.generic_cmd6_time);
+			card->ext_csd.generic_cmd6_time,
+			true, false, false);
 	if (err) {
 		pr_err("%s: switch to bus width %d ddr failed\n",
 			mmc_hostname(host), 1 << bus_width);
@@ -1118,8 +1212,10 @@
 	if (err)
 		err = __mmc_set_signal_voltage(host, MMC_SIGNAL_VOLTAGE_330);
 
-	if (!err)
+	if (!err) {
 		mmc_set_timing(host, MMC_TIMING_MMC_DDR52);
+		err = mmc_switch_status(card, false);
+	}
 
 	return err;
 }
@@ -1134,9 +1230,28 @@
 	/*
 	 * HS400 mode requires 8-bit bus width
 	 */
-	if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
-	      host->ios.bus_width == MMC_BUS_WIDTH_8))
-		return 0;
+	if (card->ext_csd.strobe_support) {
+		if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
+		    host->caps & MMC_CAP_8_BIT_DATA))
+			return 0;
+
+		/* For Enhance Strobe flow. For non Enhance Strobe, signal
+		 * voltage will not be set.
+		 */
+		if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_2V)
+			err = __mmc_set_signal_voltage(host,
+					MMC_SIGNAL_VOLTAGE_120);
+
+		if (err && card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200_1_8V)
+			err = __mmc_set_signal_voltage(host,
+					MMC_SIGNAL_VOLTAGE_180);
+		if (err)
+			return err;
+	} else {
+		if (!(card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
+		    host->ios.bus_width == MMC_BUS_WIDTH_8))
+			return 0;
+	}
 
 	/* Switch card to HS mode */
 	val = EXT_CSD_TIMING_HS;
@@ -1157,14 +1272,22 @@
 	max_dtr = card->ext_csd.hs_max_dtr;
 	mmc_set_clock(host, max_dtr);
 
-	err = mmc_switch_status(card);
+	err = mmc_switch_status(card, false);
 	if (err)
 		goto out_err;
 
+	val = EXT_CSD_DDR_BUS_WIDTH_8;
+	if (card->ext_csd.strobe_support) {
+		err = mmc_select_bus_width(card);
+		if (IS_ERR_VALUE((unsigned long)err))
+			return err;
+		val |= EXT_CSD_BUS_WIDTH_STROBE;
+	}
+
 	/* Switch card to DDR */
 	err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 			 EXT_CSD_BUS_WIDTH,
-			 EXT_CSD_DDR_BUS_WIDTH_8,
+			 val,
 			 card->ext_csd.generic_cmd6_time);
 	if (err) {
 		pr_err("%s: switch to bus width for hs400 failed, err:%d\n",
@@ -1189,7 +1312,28 @@
 	mmc_set_timing(host, MMC_TIMING_MMC_HS400);
 	mmc_set_bus_speed(card);
 
-	err = mmc_switch_status(card);
+	if (card->ext_csd.strobe_support && host->ops->enhanced_strobe) {
+		mmc_host_clk_hold(host);
+		err = host->ops->enhanced_strobe(host);
+		mmc_host_clk_release(host);
+	} else if ((host->caps2 & MMC_CAP2_HS400_POST_TUNING) &&
+			host->ops->execute_tuning) {
+		mmc_host_clk_hold(host);
+		err = host->ops->execute_tuning(host,
+				MMC_SEND_TUNING_BLOCK_HS200);
+		mmc_host_clk_release(host);
+
+		if (err)
+			pr_warn("%s: tuning execution failed\n",
+				mmc_hostname(host));
+	}
+
+	/*
+	 * Sending of CMD13 should be done after the host calibration
+	 * for enhanced_strobe or HS400 mode is completed.
+	 * Otherwise may see CMD13 timeouts or CRC errors.
+	 */
+	err = mmc_switch_status(card, false);
 	if (err)
 		goto out_err;
 
@@ -1227,7 +1371,7 @@
 
 	mmc_set_timing(host, MMC_TIMING_MMC_DDR52);
 
-	err = mmc_switch_status(card);
+	err = mmc_switch_status(card, false);
 	if (err)
 		goto out_err;
 
@@ -1240,7 +1384,7 @@
 
 	mmc_set_timing(host, MMC_TIMING_MMC_HS);
 
-	err = mmc_switch_status(card);
+	err = mmc_switch_status(card, false);
 	if (err)
 		goto out_err;
 
@@ -1255,7 +1399,7 @@
 
 	mmc_set_timing(host, MMC_TIMING_MMC_HS200);
 
-	err = mmc_switch_status(card);
+	err = mmc_switch_status(card, false);
 	if (err)
 		goto out_err;
 
@@ -1301,7 +1445,7 @@
 
 	mmc_set_clock(host, card->ext_csd.hs_max_dtr);
 
-	err = mmc_switch_status(card);
+	err = mmc_switch_status(card, false);
 	if (err)
 		goto out_err;
 
@@ -1338,7 +1482,7 @@
 	if (host->ops->hs400_enhanced_strobe)
 		host->ops->hs400_enhanced_strobe(host, &host->ios);
 
-	err = mmc_switch_status(card);
+	err = mmc_switch_status(card, false);
 	if (err)
 		goto out_err;
 
@@ -1411,7 +1555,12 @@
 		old_timing = host->ios.timing;
 		mmc_set_timing(host, MMC_TIMING_MMC_HS200);
 
-		err = mmc_switch_status(card);
+		/*
+		 * Since after switching to hs200, crc errors might
+		 * occur for commands send before tuning.
+		 * So ignore crc error for cmd13.
+		 */
+		err = mmc_switch_status(card, true);
 		/*
 		 * mmc_select_timing() assumes timing has not changed if
 		 * it is a switch error.
@@ -1431,6 +1580,17 @@
 	return err;
 }
 
+static int mmc_reboot_notify(struct notifier_block *notify_block,
+		unsigned long event, void *unused)
+{
+	struct mmc_card *card = container_of(
+			notify_block, struct mmc_card, reboot_notify);
+
+	card->pon_type = (event != SYS_RESTART) ? MMC_LONG_PON : MMC_SHRT_PON;
+
+	return NOTIFY_OK;
+}
+
 /*
  * Activate High Speed, HS200 or HS400ES mode if supported.
  */
@@ -1441,7 +1601,10 @@
 	if (!mmc_can_ext_csd(card))
 		goto bus_speed;
 
-	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400ES)
+	/* For Enhance Strobe HS400 flow */
+	if (card->ext_csd.strobe_support &&
+	    card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
+	    card->host->caps & MMC_CAP_8_BIT_DATA)
 		err = mmc_select_hs400es(card);
 	else if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)
 		err = mmc_select_hs200(card);
@@ -1474,12 +1637,242 @@
 	 */
 	if (card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400 &&
 	    host->ios.bus_width == MMC_BUS_WIDTH_8)
-		if (host->ops->prepare_hs400_tuning)
-			host->ops->prepare_hs400_tuning(host, &host->ios);
+		mmc_set_timing(host, MMC_TIMING_MMC_HS400);
 
 	return mmc_execute_tuning(card);
 }
 
+static int mmc_select_cmdq(struct mmc_card *card)
+{
+	struct mmc_host *host = card->host;
+	int ret = 0;
+
+	if (!host->cmdq_ops) {
+		pr_err("%s: host controller doesn't support CMDQ\n",
+		       mmc_hostname(host));
+		return 0;
+	}
+
+	ret = mmc_set_blocklen(card, MMC_CARD_CMDQ_BLK_SIZE);
+	if (ret)
+		goto out;
+
+	ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_CMDQ, 1,
+			 card->ext_csd.generic_cmd6_time);
+	if (ret)
+		goto out;
+
+	mmc_card_set_cmdq(card);
+	mmc_host_clk_hold(card->host);
+	ret = host->cmdq_ops->enable(card->host);
+	if (ret) {
+		mmc_host_clk_release(card->host);
+		pr_err("%s: failed (%d) enabling CMDQ on host\n",
+			mmc_hostname(host), ret);
+		mmc_card_clr_cmdq(card);
+		ret = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_CMDQ, 0,
+				 card->ext_csd.generic_cmd6_time);
+		goto out;
+	}
+
+	mmc_host_clk_release(card->host);
+	pr_info_once("%s: CMDQ enabled on card\n", mmc_hostname(host));
+out:
+	return ret;
+}
+
+static int mmc_select_hs_ddr52(struct mmc_host *host)
+{
+	int err;
+
+	mmc_select_hs(host->card);
+	err = mmc_select_bus_width(host->card);
+	if (err < 0) {
+		pr_err("%s: %s: select_bus_width failed(%d)\n",
+			mmc_hostname(host), __func__, err);
+		return err;
+	}
+
+	err = mmc_select_hs_ddr(host->card);
+	mmc_set_clock(host, MMC_HIGH_52_MAX_DTR);
+
+	return err;
+}
+
+/*
+ * Scale down from HS400 to HS in order to allow frequency change.
+ * This is needed for cards that doesn't support changing frequency in HS400
+ */
+static int mmc_scale_low(struct mmc_host *host, unsigned long freq)
+{
+	int err = 0;
+
+	mmc_set_timing(host, MMC_TIMING_LEGACY);
+	mmc_set_clock(host, MMC_HIGH_26_MAX_DTR);
+
+	if (host->clk_scaling.lower_bus_speed_mode &
+	    MMC_SCALING_LOWER_DDR52_MODE) {
+		err = mmc_select_hs_ddr52(host);
+		if (err)
+			pr_err("%s: %s: failed to switch to DDR52: err: %d\n",
+			       mmc_hostname(host), __func__, err);
+		else
+			return err;
+	}
+
+	err = mmc_select_hs(host->card);
+	if (err) {
+		pr_err("%s: %s: scaling low: failed (%d)\n",
+		       mmc_hostname(host), __func__, err);
+		return err;
+	}
+
+	err = mmc_select_bus_width(host->card);
+	if (err < 0) {
+		pr_err("%s: %s: select_bus_width failed(%d)\n",
+			mmc_hostname(host), __func__, err);
+		return err;
+	}
+
+	mmc_set_clock(host, freq);
+
+	return 0;
+}
+
+/*
+ * Scale UP from HS to HS200/H400
+ */
+static int mmc_scale_high(struct mmc_host *host)
+{
+	int err = 0;
+
+	if (mmc_card_ddr52(host->card)) {
+		mmc_set_timing(host, MMC_TIMING_LEGACY);
+		mmc_set_clock(host, MMC_HIGH_26_MAX_DTR);
+	}
+
+	if (!host->card->ext_csd.strobe_support) {
+		if (!(host->card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS200)) {
+			pr_err("%s: %s: card does not support HS200\n",
+				mmc_hostname(host), __func__);
+			WARN_ON(1);
+			return -EPERM;
+		}
+
+		err = mmc_select_hs200(host->card);
+		if (err) {
+			pr_err("%s: %s: selecting HS200 failed (%d)\n",
+				mmc_hostname(host), __func__, err);
+			return err;
+		}
+
+		mmc_set_bus_speed(host->card);
+
+		err = mmc_hs200_tuning(host->card);
+		if (err) {
+			pr_err("%s: %s: hs200 tuning failed (%d)\n",
+				mmc_hostname(host), __func__, err);
+			return err;
+		}
+
+		if (!(host->card->mmc_avail_type & EXT_CSD_CARD_TYPE_HS400)) {
+			pr_debug("%s: card does not support HS400\n",
+				mmc_hostname(host));
+			return 0;
+		}
+	}
+
+	err = mmc_select_hs400(host->card);
+	if (err) {
+		pr_err("%s: %s: select hs400 failed (%d)\n",
+			mmc_hostname(host), __func__, err);
+		return err;
+	}
+
+	return err;
+}
+
+static int mmc_set_clock_bus_speed(struct mmc_card *card, unsigned long freq)
+{
+	int err = 0;
+
+	if (freq == MMC_HS200_MAX_DTR)
+		err = mmc_scale_high(card->host);
+	else
+		err = mmc_scale_low(card->host, freq);
+
+	return err;
+}
+
+static inline unsigned long mmc_ddr_freq_accommodation(unsigned long freq)
+{
+	if (freq == MMC_HIGH_DDR_MAX_DTR)
+		return freq;
+
+	return freq/2;
+}
+
+/**
+ * mmc_change_bus_speed() - Change MMC card bus frequency at runtime
+ * @host: pointer to mmc host structure
+ * @freq: pointer to desired frequency to be set
+ *
+ * Change the MMC card bus frequency at runtime after the card is
+ * initialized. Callers are expected to make sure of the card's
+ * state (DATA/RCV/TRANSFER) before changing the frequency at runtime.
+ *
+ * If the frequency to change is greater than max. supported by card,
+ * *freq is changed to max. supported by card. If it is less than min.
+ * supported by host, *freq is changed to min. supported by host.
+ * Host is assumed to be calimed while calling this funciton.
+ */
+static int mmc_change_bus_speed(struct mmc_host *host, unsigned long *freq)
+{
+	int err = 0;
+	struct mmc_card *card;
+	unsigned long actual_freq;
+
+	card = host->card;
+
+	if (!card || !freq) {
+		err = -EINVAL;
+		goto out;
+	}
+	actual_freq = *freq;
+
+	WARN_ON(!host->claimed);
+
+	/*
+	 * For scaling up/down HS400 we'll need special handling,
+	 * for other timings we can simply do clock frequency change
+	 */
+	if (mmc_card_hs400(card) ||
+		(!mmc_card_hs200(host->card) && *freq == MMC_HS200_MAX_DTR)) {
+		err = mmc_set_clock_bus_speed(card, *freq);
+		if (err) {
+			pr_err("%s: %s: failed (%d)to set bus and clock speed (freq=%lu)\n",
+				mmc_hostname(host), __func__, err, *freq);
+			goto out;
+		}
+	} else if (mmc_card_hs200(host->card)) {
+		mmc_set_clock(host, *freq);
+		err = mmc_hs200_tuning(host->card);
+		if (err) {
+			pr_warn("%s: %s: tuning execution failed %d\n",
+				mmc_hostname(card->host),
+				__func__, err);
+			mmc_set_clock(host, host->clk_scaling.curr_freq);
+		}
+	} else {
+		if (mmc_card_ddr52(host->card))
+			actual_freq = mmc_ddr_freq_accommodation(*freq);
+		mmc_set_clock(host, actual_freq);
+	}
+
+out:
+	return err;
+}
+
 /*
  * Handle the detection and initialisation of a card.
  *
@@ -1508,20 +1901,27 @@
 	 * respond.
 	 * mmc_go_idle is needed for eMMC that are asleep
 	 */
+reinit:
 	mmc_go_idle(host);
 
 	/* The extra bit indicates that we support high capacity */
 	err = mmc_send_op_cond(host, ocr | (1 << 30), &rocr);
-	if (err)
+	if (err) {
+		pr_err("%s: %s: mmc_send_op_cond() fails %d\n",
+				mmc_hostname(host), __func__, err);
 		goto err;
+	}
 
 	/*
 	 * For SPI, enable CRC as appropriate.
 	 */
 	if (mmc_host_is_spi(host)) {
 		err = mmc_spi_set_crc(host, use_spi_crc);
-		if (err)
+		if (err) {
+			pr_err("%s: %s: mmc_spi_set_crc() fails %d\n",
+					mmc_hostname(host), __func__, err);
 			goto err;
+		}
 	}
 
 	/*
@@ -1531,12 +1931,17 @@
 		err = mmc_send_cid(host, cid);
 	else
 		err = mmc_all_send_cid(host, cid);
-	if (err)
+	if (err) {
+		pr_err("%s: %s: mmc_send_cid() fails %d\n",
+				mmc_hostname(host), __func__, err);
 		goto err;
+	}
 
 	if (oldcard) {
 		if (memcmp(cid, oldcard->raw_cid, sizeof(cid)) != 0) {
 			err = -ENOENT;
+			pr_err("%s: %s: CID memcmp failed %d\n",
+					mmc_hostname(host), __func__, err);
 			goto err;
 		}
 
@@ -1548,6 +1953,8 @@
 		card = mmc_alloc_card(host, &mmc_type);
 		if (IS_ERR(card)) {
 			err = PTR_ERR(card);
+			pr_err("%s: %s: no memory to allocate for card %d\n",
+					mmc_hostname(host), __func__, err);
 			goto err;
 		}
 
@@ -1556,6 +1963,7 @@
 		card->rca = 1;
 		memcpy(card->raw_cid, cid, sizeof(card->raw_cid));
 		host->card = card;
+		card->reboot_notify.notifier_call = mmc_reboot_notify;
 	}
 
 	/*
@@ -1569,8 +1977,11 @@
 	 */
 	if (!mmc_host_is_spi(host)) {
 		err = mmc_set_relative_addr(card);
-		if (err)
+		if (err) {
+			pr_err("%s: %s: mmc_set_relative_addr() fails %d\n",
+					mmc_hostname(host), __func__, err);
 			goto free_card;
+		}
 
 		mmc_set_bus_mode(host, MMC_BUSMODE_PUSHPULL);
 	}
@@ -1580,15 +1991,24 @@
 		 * Fetch CSD from card.
 		 */
 		err = mmc_send_csd(card, card->raw_csd);
-		if (err)
+		if (err) {
+			pr_err("%s: %s: mmc_send_csd() fails %d\n",
+					mmc_hostname(host), __func__, err);
 			goto free_card;
+		}
 
 		err = mmc_decode_csd(card);
-		if (err)
+		if (err) {
+			pr_err("%s: %s: mmc_decode_csd() fails %d\n",
+					mmc_hostname(host), __func__, err);
 			goto free_card;
+		}
 		err = mmc_decode_cid(card);
-		if (err)
+		if (err) {
+			pr_err("%s: %s: mmc_decode_cid() fails %d\n",
+					mmc_hostname(host), __func__, err);
 			goto free_card;
+		}
 	}
 
 	/*
@@ -1603,15 +2023,21 @@
 	 */
 	if (!mmc_host_is_spi(host)) {
 		err = mmc_select_card(card);
-		if (err)
+		if (err) {
+			pr_err("%s: %s: mmc_select_card() fails %d\n",
+					mmc_hostname(host), __func__, err);
 			goto free_card;
+		}
 	}
 
 	if (!oldcard) {
 		/* Read extended CSD. */
 		err = mmc_read_ext_csd(card);
-		if (err)
+		if (err) {
+			pr_err("%s: %s: mmc_read_ext_csd() fails %d\n",
+					mmc_hostname(host), __func__, err);
 			goto free_card;
+		}
 
 		/*
 		 * If doing byte addressing, check if required to do sector
@@ -1624,6 +2050,9 @@
 
 		/* Erase size depends on CSD and Extended CSD */
 		mmc_set_erase_size(card);
+
+		if (card->ext_csd.sectors && (rocr & MMC_CARD_SECTOR_ADDR))
+			mmc_card_set_blockaddr(card);
 	}
 
 	/*
@@ -1636,8 +2065,11 @@
 				 EXT_CSD_ERASE_GROUP_DEF, 1,
 				 card->ext_csd.generic_cmd6_time);
 
-		if (err && err != -EBADMSG)
+		if (err && err != -EBADMSG) {
+			pr_err("%s: %s: mmc_switch() for ERASE_GRP_DEF fails %d\n",
+					mmc_hostname(host), __func__, err);
 			goto free_card;
+		}
 
 		if (err) {
 			err = 0;
@@ -1667,8 +2099,13 @@
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL, EXT_CSD_PART_CONFIG,
 				 card->ext_csd.part_config,
 				 card->ext_csd.part_time);
-		if (err && err != -EBADMSG)
+		if (err && err != -EBADMSG) {
+			pr_err("%s: %s: mmc_switch() for PART_CONFIG fails %d\n",
+					mmc_hostname(host), __func__, err);
 			goto free_card;
+		}
+		card->part_curr = card->ext_csd.part_config &
+				  EXT_CSD_PART_CONFIG_ACC_MASK;
 	}
 
 	/*
@@ -1679,8 +2116,11 @@
 				 EXT_CSD_POWER_OFF_NOTIFICATION,
 				 EXT_CSD_POWER_ON,
 				 card->ext_csd.generic_cmd6_time);
-		if (err && err != -EBADMSG)
+		if (err && err != -EBADMSG) {
+			pr_err("%s: %s: mmc_switch() for POWER_ON PON fails %d\n",
+					mmc_hostname(host), __func__, err);
 			goto free_card;
+		}
 
 		/*
 		 * The err can be -EBADMSG or 0,
@@ -1694,8 +2134,11 @@
 	 * Select timing interface
 	 */
 	err = mmc_select_timing(card);
-	if (err)
+	if (err) {
+		pr_err("%s: %s: mmc_select_timing() fails %d\n",
+					mmc_hostname(host), __func__, err);
 		goto free_card;
+	}
 
 	if (mmc_card_hs200(card)) {
 		err = mmc_hs200_tuning(card);
@@ -1715,6 +2158,16 @@
 		}
 	}
 
+	card->clk_scaling_lowest = host->f_min;
+	if ((card->mmc_avail_type | EXT_CSD_CARD_TYPE_HS400) ||
+			(card->mmc_avail_type | EXT_CSD_CARD_TYPE_HS200))
+		card->clk_scaling_highest = card->ext_csd.hs200_max_dtr;
+	else if ((card->mmc_avail_type | EXT_CSD_CARD_TYPE_HS) ||
+			(card->mmc_avail_type | EXT_CSD_CARD_TYPE_DDR_52))
+		card->clk_scaling_highest = card->ext_csd.hs_max_dtr;
+	else
+		card->clk_scaling_highest = card->csd.max_dtr;
+
 	/*
 	 * Choose the power class with selected bus interface
 	 */
@@ -1727,8 +2180,11 @@
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 				EXT_CSD_HPI_MGMT, 1,
 				card->ext_csd.generic_cmd6_time);
-		if (err && err != -EBADMSG)
+		if (err && err != -EBADMSG) {
+			pr_err("%s: %s: mmc_switch() for HPI_MGMT fails %d\n",
+					mmc_hostname(host), __func__, err);
 			goto free_card;
+		}
 		if (err) {
 			pr_warn("%s: Enabling HPI failed\n",
 				mmc_hostname(card->host));
@@ -1740,28 +2196,72 @@
 	/*
 	 * If cache size is higher than 0, this indicates
 	 * the existence of cache and it can be turned on.
+	 * If HPI is not supported then cache shouldn't be enabled.
 	 */
 	if (!mmc_card_broken_hpi(card) &&
 	    card->ext_csd.cache_size > 0) {
-		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
-				EXT_CSD_CACHE_CTRL, 1,
-				card->ext_csd.generic_cmd6_time);
-		if (err && err != -EBADMSG)
-			goto free_card;
+		if (card->ext_csd.hpi_en &&
+			(!(card->quirks & MMC_QUIRK_CACHE_DISABLE))) {
+			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+					EXT_CSD_CACHE_CTRL, 1,
+					card->ext_csd.generic_cmd6_time);
+			if (err && err != -EBADMSG) {
+				pr_err("%s: %s: fail on CACHE_CTRL ON %d\n",
+					mmc_hostname(host), __func__, err);
+				goto free_card;
+			}
 
-		/*
-		 * Only if no error, cache is turned on successfully.
-		 */
-		if (err) {
-			pr_warn("%s: Cache is supported, but failed to turn on (%d)\n",
-				mmc_hostname(card->host), err);
-			card->ext_csd.cache_ctrl = 0;
-			err = 0;
+			/*
+			 * Only if no error, cache is turned on successfully.
+			 */
+			if (err) {
+				pr_warn("%s: Cache is supported, but failed to turn on (%d)\n",
+					mmc_hostname(card->host), err);
+				card->ext_csd.cache_ctrl = 0;
+				err = 0;
+			} else {
+				card->ext_csd.cache_ctrl = 1;
+			}
+			/* enable cache barrier if supported by the device */
+			if (card->ext_csd.cache_ctrl &&
+					card->ext_csd.barrier_support) {
+				err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+					EXT_CSD_BARRIER_CTRL, 1,
+					card->ext_csd.generic_cmd6_time);
+				if (err && err != -EBADMSG) {
+					pr_err("%s: %s: mmc_switch() for BARRIER_CTRL fails %d\n",
+						mmc_hostname(host), __func__,
+						err);
+					goto free_card;
+				}
+				if (err) {
+					pr_warn("%s: Barrier is supported but failed to turn on (%d)\n",
+						mmc_hostname(card->host), err);
+					card->ext_csd.barrier_en = 0;
+					err = 0;
+				} else {
+					card->ext_csd.barrier_en = 1;
+				}
+			}
 		} else {
-			card->ext_csd.cache_ctrl = 1;
+			/*
+			 * mmc standard doesn't say what is the card default
+			 * value for EXT_CSD_CACHE_CTRL.
+			 * Hence, cache may be enabled by default by
+			 * card vendors.
+			 * Thus, it is best to explicitly disable cache in case
+			 * we want to avoid cache.
+			 */
+			err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+					EXT_CSD_CACHE_CTRL, 0,
+					card->ext_csd.generic_cmd6_time);
+			if (err) {
+				pr_err("%s: %s: fail on CACHE_CTRL OFF %d\n",
+					mmc_hostname(host), __func__, err);
+				goto free_card;
+			}
 		}
 	}
-
 	/*
 	 * The mandatory minimum values are defined for packed command.
 	 * read: 5, write: 3
@@ -1773,8 +2273,11 @@
 				EXT_CSD_EXP_EVENTS_CTRL,
 				EXT_CSD_PACKED_EVENT_EN,
 				card->ext_csd.generic_cmd6_time);
-		if (err && err != -EBADMSG)
+		if (err && err != -EBADMSG) {
+			pr_err("%s: %s: mmc_switch() for EXP_EVENTS_CTRL fails %d\n",
+					mmc_hostname(host), __func__, err);
 			goto free_card;
+		}
 		if (err) {
 			pr_warn("%s: Enabling packed event failed\n",
 				mmc_hostname(card->host));
@@ -1783,40 +2286,125 @@
 		} else {
 			card->ext_csd.packed_event_en = 1;
 		}
+
+	}
+
+	if (!oldcard) {
+		if ((host->caps2 & MMC_CAP2_PACKED_CMD) &&
+		    (card->ext_csd.max_packed_writes > 0)) {
+			/*
+			 * We would like to keep the statistics in an index
+			 * that equals the num of packed requests
+			 * (1 to max_packed_writes)
+			 */
+			card->wr_pack_stats.packing_events = kzalloc(
+				(card->ext_csd.max_packed_writes + 1) *
+				sizeof(*card->wr_pack_stats.packing_events),
+				GFP_KERNEL);
+			if (!card->wr_pack_stats.packing_events) {
+				pr_err("%s: %s: no memory for packing events\n",
+						mmc_hostname(host), __func__);
+				goto free_card;
+			}
+		}
+	}
+
+	/*
+	 * Start auto bkops, if supported.
+	 *
+	 * Note: This leaves the possibility of having both manual and
+	 * auto bkops running in parallel. The runtime implementation
+	 * will allow this, but ignore bkops exceptions on the premises
+	 * that auto bkops will eventually kick in and the device will
+	 * handle bkops without START_BKOPS from the host.
+	 */
+	if (mmc_card_support_auto_bkops(card)) {
+		/*
+		 * Ignore the return value of setting auto bkops.
+		 * If it failed, will run in backward compatible mode.
+		 */
+		(void)mmc_set_auto_bkops(card, true);
+	}
+
+	if (card->ext_csd.cmdq_support && (card->host->caps2 &
+					   MMC_CAP2_CMD_QUEUE)) {
+		err = mmc_select_cmdq(card);
+		if (err) {
+			pr_err("%s: selecting CMDQ mode: failed: %d\n",
+					   mmc_hostname(card->host), err);
+			card->ext_csd.cmdq_support = 0;
+			oldcard = card;
+			goto reinit;
+		}
 	}
 
 	return 0;
 
 free_card:
-	host->card = NULL;
-	if (!oldcard)
+	if (!oldcard) {
+		host->card = NULL;
 		mmc_remove_card(card);
+	}
 err:
 	return err;
 }
 
-static int mmc_can_sleep(struct mmc_card *card)
+static int mmc_can_sleepawake(struct mmc_host *host)
 {
-	return (card && card->ext_csd.rev >= 3);
+	return host && (host->caps2 & MMC_CAP2_SLEEP_AWAKE) && host->card &&
+		(host->card->ext_csd.rev >= 3);
 }
 
-static int mmc_sleep(struct mmc_host *host)
+static int mmc_sleepawake(struct mmc_host *host, bool sleep)
 {
 	struct mmc_command cmd = {0};
 	struct mmc_card *card = host->card;
-	unsigned int timeout_ms = DIV_ROUND_UP(card->ext_csd.sa_timeout, 10000);
+	unsigned int timeout_ms;
 	int err;
 
+	if (!card) {
+		pr_err("%s: %s: invalid card\n", mmc_hostname(host), __func__);
+		return -EINVAL;
+	}
+
+	timeout_ms = DIV_ROUND_UP(card->ext_csd.sa_timeout, 10000);
+	if (card->ext_csd.rev >= 3 &&
+		card->part_curr == EXT_CSD_PART_CONFIG_ACC_RPMB) {
+		u8 part_config = card->ext_csd.part_config;
+
+		/*
+		 * If the last access before suspend is RPMB access, then
+		 * switch to default part config so that sleep command CMD5
+		 * and deselect CMD7 can be sent to the card.
+		 */
+		part_config &= ~EXT_CSD_PART_CONFIG_ACC_MASK;
+		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
+				 EXT_CSD_PART_CONFIG,
+				 part_config,
+				 card->ext_csd.part_time);
+		if (err) {
+			pr_err("%s: %s: failed to switch to default part config %x\n",
+				mmc_hostname(host), __func__, part_config);
+			return err;
+		}
+		card->ext_csd.part_config = part_config;
+		card->part_curr = card->ext_csd.part_config &
+				  EXT_CSD_PART_CONFIG_ACC_MASK;
+	}
+
 	/* Re-tuning can't be done once the card is deselected */
 	mmc_retune_hold(host);
 
-	err = mmc_deselect_cards(host);
-	if (err)
-		goto out_release;
+	if (sleep) {
+		err = mmc_deselect_cards(host);
+		if (err)
+			goto out_release;
+	}
 
 	cmd.opcode = MMC_SLEEP_AWAKE;
 	cmd.arg = card->rca << 16;
-	cmd.arg |= 1 << 15;
+	if (sleep)
+		cmd.arg |= 1 << 15;
 
 	/*
 	 * If the max_busy_timeout of the host is specified, validate it against
@@ -1844,6 +2432,9 @@
 	if (!cmd.busy_timeout || !(host->caps & MMC_CAP_WAIT_WHILE_BUSY))
 		mmc_delay(timeout_ms);
 
+	if (!sleep)
+		err = mmc_select_card(card);
+
 out_release:
 	mmc_retune_release(host);
 	return err;
@@ -1878,6 +2469,27 @@
 	return err;
 }
 
+int mmc_send_pon(struct mmc_card *card)
+{
+	int err = 0;
+	struct mmc_host *host = card->host;
+
+	if (!mmc_can_poweroff_notify(card))
+		goto out;
+
+	mmc_get_card(card);
+	if (card->pon_type & MMC_LONG_PON)
+		err = mmc_poweroff_notify(host->card, EXT_CSD_POWER_OFF_LONG);
+	else if (card->pon_type & MMC_SHRT_PON)
+		err = mmc_poweroff_notify(host->card, EXT_CSD_POWER_OFF_SHORT);
+	if (err)
+		pr_warn("%s: error %d sending PON type %u",
+			mmc_hostname(host), err, card->pon_type);
+	mmc_put_card(card);
+out:
+	return err;
+}
+
 /*
  * Host is being removed. Free up the current card.
  */
@@ -1886,8 +2498,14 @@
 	BUG_ON(!host);
 	BUG_ON(!host->card);
 
+	unregister_reboot_notifier(&host->card->reboot_notify);
+
+	mmc_exit_clk_scaling(host);
 	mmc_remove_card(host->card);
+
+	mmc_claim_host(host);
 	host->card = NULL;
+	mmc_release_host(host);
 }
 
 /*
@@ -1927,20 +2545,101 @@
 	}
 }
 
+static int mmc_cache_card_ext_csd(struct mmc_host *host)
+{
+	int err;
+	u8 *ext_csd;
+	struct mmc_card *card = host->card;
+
+	err = mmc_get_ext_csd(card, &ext_csd);
+	if (err || !ext_csd) {
+		pr_err("%s: %s: mmc_get_ext_csd failed (%d)\n",
+			mmc_hostname(host), __func__, err);
+		return err;
+	}
+
+	/* only cache read/write fields that the sw changes */
+	card->ext_csd.raw_ext_csd_cmdq = ext_csd[EXT_CSD_CMDQ];
+	card->ext_csd.raw_ext_csd_cache_ctrl = ext_csd[EXT_CSD_CACHE_CTRL];
+	card->ext_csd.raw_ext_csd_bus_width = ext_csd[EXT_CSD_BUS_WIDTH];
+	card->ext_csd.raw_ext_csd_hs_timing = ext_csd[EXT_CSD_HS_TIMING];
+
+	kfree(ext_csd);
+
+	return 0;
+}
+
+static int mmc_test_awake_ext_csd(struct mmc_host *host)
+{
+	int err;
+	u8 *ext_csd;
+	struct mmc_card *card = host->card;
+
+	err = mmc_get_ext_csd(card, &ext_csd);
+	if (err || !ext_csd) {
+		pr_err("%s: %s: mmc_get_ext_csd failed (%d)\n",
+			mmc_hostname(host), __func__, err);
+		return err;
+	}
+
+	/* only compare read/write fields that the sw changes */
+	pr_debug("%s: %s: type(cached:current) cmdq(%d:%d) cache_ctrl(%d:%d) bus_width (%d:%d) timing(%d:%d)\n",
+		mmc_hostname(host), __func__,
+		card->ext_csd.raw_ext_csd_cmdq,
+		ext_csd[EXT_CSD_CMDQ],
+		card->ext_csd.raw_ext_csd_cache_ctrl,
+		ext_csd[EXT_CSD_CACHE_CTRL],
+		card->ext_csd.raw_ext_csd_bus_width,
+		ext_csd[EXT_CSD_BUS_WIDTH],
+		card->ext_csd.raw_ext_csd_hs_timing,
+		ext_csd[EXT_CSD_HS_TIMING]);
+
+	err = !((card->ext_csd.raw_ext_csd_cmdq ==
+			ext_csd[EXT_CSD_CMDQ]) &&
+		(card->ext_csd.raw_ext_csd_cache_ctrl ==
+			ext_csd[EXT_CSD_CACHE_CTRL]) &&
+		(card->ext_csd.raw_ext_csd_bus_width ==
+			ext_csd[EXT_CSD_BUS_WIDTH]) &&
+		(card->ext_csd.raw_ext_csd_hs_timing ==
+			ext_csd[EXT_CSD_HS_TIMING]));
+
+	kfree(ext_csd);
+
+	return err;
+}
+
 static int _mmc_suspend(struct mmc_host *host, bool is_suspend)
 {
 	int err = 0;
-	unsigned int notify_type = is_suspend ? EXT_CSD_POWER_OFF_SHORT :
-					EXT_CSD_POWER_OFF_LONG;
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
 
+	err = mmc_suspend_clk_scaling(host);
+	if (err) {
+		pr_err("%s: %s: fail to suspend clock scaling (%d)\n",
+			mmc_hostname(host), __func__, err);
+		return err;
+	}
+
 	mmc_claim_host(host);
 
 	if (mmc_card_suspended(host->card))
 		goto out;
 
+	if (host->card->cmdq_init) {
+		BUG_ON(host->cmdq_ctx.active_reqs);
+
+		err = mmc_cmdq_halt(host, true);
+		if (err) {
+			pr_err("%s: halt: failed: %d\n", __func__, err);
+			goto out;
+		}
+		mmc_host_clk_hold(host);
+		host->cmdq_ops->disable(host, true);
+		mmc_host_clk_release(host);
+	}
+
 	if (mmc_card_doing_bkops(host->card)) {
 		err = mmc_stop_bkops(host->card);
 		if (err)
@@ -1951,36 +2650,117 @@
 	if (err)
 		goto out;
 
-	if (mmc_can_poweroff_notify(host->card) &&
-		((host->caps2 & MMC_CAP2_FULL_PWR_CYCLE) || !is_suspend))
-		err = mmc_poweroff_notify(host->card, notify_type);
-	else if (mmc_can_sleep(host->card))
-		err = mmc_sleep(host);
-	else if (!mmc_host_is_spi(host))
+	if (mmc_can_sleepawake(host)) {
+		/*
+		 * For caching host->ios to cached_ios we need to
+		 * make sure that clocks are not gated otherwise
+		 * cached_ios->clock will be 0.
+		 */
+		mmc_host_clk_hold(host);
+		memcpy(&host->cached_ios, &host->ios, sizeof(host->cached_ios));
+		mmc_cache_card_ext_csd(host);
+		err = mmc_sleepawake(host, true);
+		mmc_host_clk_release(host);
+	} else if (!mmc_host_is_spi(host)) {
 		err = mmc_deselect_cards(host);
+	}
 
 	if (!err) {
 		mmc_power_off(host);
 		mmc_card_set_suspended(host->card);
 	}
 out:
+	/* Kick CMDQ thread to process any requests came in while suspending */
+	if (host->card->cmdq_init)
+		wake_up(&host->cmdq_ctx.wait);
+
 	mmc_release_host(host);
 	return err;
 }
 
+static int mmc_partial_init(struct mmc_host *host)
+{
+	int err = 0;
+	struct mmc_card *card = host->card;
+
+	pr_debug("%s: %s: starting partial init\n",
+		mmc_hostname(host), __func__);
+
+	mmc_set_bus_width(host, host->cached_ios.bus_width);
+	mmc_set_timing(host, host->cached_ios.timing);
+	mmc_set_clock(host, host->cached_ios.clock);
+	mmc_set_bus_mode(host, host->cached_ios.bus_mode);
+
+	mmc_host_clk_hold(host);
+
+	if (mmc_card_hs400(card)) {
+		if (card->ext_csd.strobe_support && host->ops->enhanced_strobe)
+			err = host->ops->enhanced_strobe(host);
+	} else if (mmc_card_hs200(card) && host->ops->execute_tuning) {
+		err = host->ops->execute_tuning(host,
+			MMC_SEND_TUNING_BLOCK_HS200);
+		if (err)
+			pr_warn("%s: %s: tuning execution failed (%d)\n",
+				mmc_hostname(host), __func__, err);
+	}
+
+	/*
+	 * The ext_csd is read to make sure the card did not went through
+	 * Power-failure during sleep period.
+	 * A subset of the W/E_P, W/C_P register will be tested. In case
+	 * these registers values are different from the values that were
+	 * cached during suspend, we will conclude that a Power-failure occurred
+	 * and will do full initialization sequence.
+	 * In addition, full init sequence also transfer ext_csd before moving
+	 * to CMDQ mode which has a side affect of configuring SDHCI registers
+	 * which needed to be done before moving to CMDQ mode. The same
+	 * registers need to be configured for partial init.
+	 */
+	err = mmc_test_awake_ext_csd(host);
+	if (err) {
+		pr_debug("%s: %s: fail on ext_csd read (%d)\n",
+			mmc_hostname(host), __func__, err);
+		goto out;
+	}
+	pr_debug("%s: %s: reading and comparing ext_csd successful\n",
+		mmc_hostname(host), __func__);
+
+	if (card->ext_csd.cmdq_support && (card->host->caps2 &
+					   MMC_CAP2_CMD_QUEUE)) {
+		err = mmc_select_cmdq(card);
+		if (err) {
+			pr_warn("%s: %s: enabling CMDQ mode failed (%d)\n",
+					mmc_hostname(card->host),
+					__func__, err);
+		}
+	}
+out:
+	mmc_host_clk_release(host);
+
+	pr_debug("%s: %s: done partial init (%d)\n",
+		mmc_hostname(host), __func__, err);
+
+	return err;
+}
+
 /*
  * Suspend callback
  */
 static int mmc_suspend(struct mmc_host *host)
 {
 	int err;
+	ktime_t start = ktime_get();
 
+	MMC_TRACE(host, "%s: Enter\n", __func__);
 	err = _mmc_suspend(host, true);
 	if (!err) {
 		pm_runtime_disable(&host->card->dev);
 		pm_runtime_set_suspended(&host->card->dev);
 	}
 
+	trace_mmc_suspend(mmc_hostname(host), err,
+			ktime_to_us(ktime_sub(ktime_get(), start)));
+	MMC_TRACE(host, "%s: Exit err: %d\n", __func__, err);
 	return err;
 }
 
@@ -1990,43 +2770,61 @@
  */
 static int _mmc_resume(struct mmc_host *host)
 {
-	int err = 0;
+	int err = -ENOSYS;
+	int retries;
 
 	BUG_ON(!host);
 	BUG_ON(!host->card);
 
 	mmc_claim_host(host);
 
-	if (!mmc_card_suspended(host->card))
+	if (!mmc_card_suspended(host->card)) {
+		mmc_release_host(host);
 		goto out;
+	}
 
 	mmc_power_up(host, host->card->ocr);
-	err = mmc_init_card(host, host->card->ocr, host->card);
+	retries = 3;
+	while (retries) {
+		if (mmc_can_sleepawake(host)) {
+			err = mmc_sleepawake(host, false);
+			if (!err)
+				err = mmc_partial_init(host);
+			if (err)
+				pr_err("%s: %s: awake failed (%d), fallback to full init\n",
+					mmc_hostname(host), __func__, err);
+		}
+
+		if (err)
+			err = mmc_init_card(host, host->card->ocr, host->card);
+
+		if (err) {
+			pr_err("%s: MMC card re-init failed rc = %d (retries = %d)\n",
+			       mmc_hostname(host), err, retries);
+			retries--;
+			mmc_power_off(host);
+			usleep_range(5000, 5500);
+			mmc_power_up(host, host->card->ocr);
+			mmc_select_voltage(host, host->card->ocr);
+			continue;
+		}
+		break;
+	}
+	if (!err && mmc_card_cmdq(host->card)) {
+		err = mmc_cmdq_halt(host, false);
+		if (err)
+			pr_err("%s: un-halt: failed: %d\n", __func__, err);
+	}
 	mmc_card_clr_suspended(host->card);
 
-out:
 	mmc_release_host(host);
-	return err;
-}
 
-/*
- * Shutdown callback
- */
-static int mmc_shutdown(struct mmc_host *host)
-{
-	int err = 0;
+	err = mmc_resume_clk_scaling(host);
+	if (err)
+		pr_err("%s: %s: fail to resume clock scaling (%d)\n",
+			mmc_hostname(host), __func__, err);
 
-	/*
-	 * In a specific case for poweroff notify, we need to resume the card
-	 * before we can shutdown it properly.
-	 */
-	if (mmc_can_poweroff_notify(host->card) &&
-		!(host->caps2 & MMC_CAP2_FULL_PWR_CYCLE))
-		err = _mmc_resume(host);
-
-	if (!err)
-		err = _mmc_suspend(host, false);
-
+out:
 	return err;
 }
 
@@ -2035,25 +2833,97 @@
  */
 static int mmc_resume(struct mmc_host *host)
 {
+	int err = 0;
+
+	MMC_TRACE(host, "%s: Enter\n", __func__);
 	pm_runtime_enable(&host->card->dev);
+
+	MMC_TRACE(host, "%s: Exit err: %d\n", __func__, err);
 	return 0;
 }
 
+#define MAX_DEFER_SUSPEND_COUNTER 20
+static bool mmc_process_bkops(struct mmc_host *host)
+{
+	int err = 0;
+	bool is_running = false;
+	u32 status;
+
+	mmc_claim_host(host);
+	if (mmc_card_cmdq(host->card)) {
+		BUG_ON(host->cmdq_ctx.active_reqs);
+
+		err = mmc_cmdq_halt(host, true);
+		if (err) {
+			pr_err("%s: halt: failed: %d\n", __func__, err);
+			goto unhalt;
+		}
+	}
+
+	if (mmc_card_doing_bkops(host->card)) {
+		/* check that manual bkops finished */
+		err = mmc_send_status(host->card, &status);
+		if (err) {
+			pr_err("%s: Get card status fail\n", __func__);
+			goto unhalt;
+		}
+		if (R1_CURRENT_STATE(status) != R1_STATE_PRG) {
+			mmc_card_clr_doing_bkops(host->card);
+			goto unhalt;
+		}
+	} else {
+		mmc_check_bkops(host->card);
+	}
+
+	if (host->card->bkops.needs_bkops &&
+			!mmc_card_support_auto_bkops(host->card))
+		mmc_start_manual_bkops(host->card);
+
+unhalt:
+	if (mmc_card_cmdq(host->card)) {
+		err = mmc_cmdq_halt(host, false);
+		if (err)
+			pr_err("%s: unhalt: failed: %d\n", __func__, err);
+	}
+	mmc_release_host(host);
+
+	if (host->card->bkops.needs_bkops ||
+			mmc_card_doing_bkops(host->card)) {
+		if (host->card->bkops.retry_counter++ <
+				MAX_DEFER_SUSPEND_COUNTER) {
+			host->card->bkops.needs_check = true;
+			is_running = true;
+		} else {
+			host->card->bkops.retry_counter = 0;
+		}
+	}
+	return is_running;
+}
+
 /*
  * Callback for runtime_suspend.
  */
 static int mmc_runtime_suspend(struct mmc_host *host)
 {
 	int err;
+	ktime_t start = ktime_get();
 
 	if (!(host->caps & MMC_CAP_AGGRESSIVE_PM))
 		return 0;
 
+	if (mmc_process_bkops(host)) {
+		pm_runtime_mark_last_busy(&host->card->dev);
+		pr_debug("%s: defered, need bkops\n", __func__);
+		return -EBUSY;
+	}
+
 	err = _mmc_suspend(host, true);
 	if (err)
 		pr_err("%s: error %d doing aggressive suspend\n",
 			mmc_hostname(host), err);
 
+	trace_mmc_runtime_suspend(mmc_hostname(host), err,
+			ktime_to_us(ktime_sub(ktime_get(), start)));
 	return err;
 }
 
@@ -2063,13 +2933,17 @@
 static int mmc_runtime_resume(struct mmc_host *host)
 {
 	int err;
+	ktime_t start = ktime_get();
 
 	err = _mmc_resume(host);
 	if (err && err != -ENOMEDIUM)
 		pr_err("%s: error %d doing runtime resume\n",
 			mmc_hostname(host), err);
 
-	return 0;
+	trace_mmc_runtime_resume(mmc_hostname(host), err,
+			ktime_to_us(ktime_sub(ktime_get(), start)));
+
+	return err;
 }
 
 int mmc_can_reset(struct mmc_card *card)
@@ -2117,7 +2991,7 @@
 	.runtime_suspend = mmc_runtime_suspend,
 	.runtime_resume = mmc_runtime_resume,
 	.alive = mmc_alive,
-	.shutdown = mmc_shutdown,
+	.change_bus_speed = mmc_change_bus_speed,
 	.reset = mmc_reset,
 };
 
@@ -2176,6 +3050,14 @@
 		goto remove_card;
 
 	mmc_claim_host(host);
+	err = mmc_init_clk_scaling(host);
+	if (err) {
+		mmc_release_host(host);
+		goto remove_card;
+	}
+
+	register_reboot_notifier(&host->card->reboot_notify);
+
 	return 0;
 
 remove_card:
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index ad6e979..16f7c58 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -54,7 +54,7 @@
 	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
 };
 
-static inline int __mmc_send_status(struct mmc_card *card, u32 *status,
+int __mmc_send_status(struct mmc_card *card, u32 *status,
 				    bool ignore_crc)
 {
 	int err;
@@ -456,6 +456,45 @@
 }
 
 /**
+ *	mmc_prepare_switch - helper; prepare to modify EXT_CSD register
+ *	@card: the MMC card associated with the data transfer
+ *	@set: cmd set values
+ *	@index: EXT_CSD register index
+ *	@value: value to program into EXT_CSD register
+ *	@tout_ms: timeout (ms) for operation performed by register write,
+ *                   timeout of zero implies maximum possible timeout
+ *	@use_busy_signal: use the busy signal as response type
+ *
+ *	Helper to prepare to modify EXT_CSD register for selected card.
+ */
+
+static inline void mmc_prepare_switch(struct mmc_command *cmd, u8 index,
+				      u8 value, u8 set, unsigned int tout_ms,
+				      bool use_busy_signal)
+{
+	cmd->opcode = MMC_SWITCH;
+	cmd->arg = (MMC_SWITCH_MODE_WRITE_BYTE << 24) |
+		  (index << 16) |
+		  (value << 8) |
+		  set;
+	cmd->flags = MMC_CMD_AC;
+	cmd->busy_timeout = tout_ms;
+	if (use_busy_signal)
+		cmd->flags |= MMC_RSP_SPI_R1B | MMC_RSP_R1B;
+	else
+		cmd->flags |= MMC_RSP_SPI_R1 | MMC_RSP_R1;
+}
+
+int __mmc_switch_cmdq_mode(struct mmc_command *cmd, u8 set, u8 index, u8 value,
+			   unsigned int timeout_ms, bool use_busy_signal,
+			   bool ignore_timeout)
+{
+	mmc_prepare_switch(cmd, index, value, set, timeout_ms, use_busy_signal);
+	return 0;
+}
+EXPORT_SYMBOL(__mmc_switch_cmdq_mode);
+
+/**
  *	__mmc_switch - modify EXT_CSD register
  *	@card: the MMC card associated with the data transfer
  *	@set: cmd set values
@@ -479,6 +518,7 @@
 	unsigned long timeout;
 	u32 status = 0;
 	bool use_r1b_resp = use_busy_signal;
+	int retries = 5;
 	bool expired = false;
 	bool busy = false;
 
@@ -494,12 +534,8 @@
 		(timeout_ms > host->max_busy_timeout))
 		use_r1b_resp = false;
 
-	cmd.opcode = MMC_SWITCH;
-	cmd.arg = (MMC_SWITCH_MODE_WRITE_BYTE << 24) |
-		  (index << 16) |
-		  (value << 8) |
-		  set;
-	cmd.flags = MMC_CMD_AC;
+	mmc_prepare_switch(&cmd, index, value, set, timeout_ms,
+			   use_r1b_resp);
 	if (use_r1b_resp) {
 		cmd.flags |= MMC_RSP_SPI_R1B | MMC_RSP_R1B;
 		/*
@@ -513,6 +549,8 @@
 
 	if (index == EXT_CSD_SANITIZE_START)
 		cmd.sanitize_busy = true;
+	else if (index == EXT_CSD_BKOPS_START)
+		cmd.bkops_busy = true;
 
 	err = mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES);
 	if (err)
@@ -570,10 +608,17 @@
 		/* Timeout if the device never leaves the program state. */
 		if (expired &&
 		    (R1_CURRENT_STATE(status) == R1_STATE_PRG || busy)) {
-			pr_err("%s: Card stuck in programming state! %s\n",
-				mmc_hostname(host), __func__);
-			err = -ETIMEDOUT;
-			goto out;
+			pr_err("%s: Card stuck in programming state! %s, timeout:%ums, retries:%d\n",
+				mmc_hostname(host), __func__,
+				timeout_ms, retries);
+			if (retries)
+				timeout = jiffies +
+						msecs_to_jiffies(timeout_ms);
+			else {
+				err = -ETIMEDOUT;
+				goto out;
+			}
+			retries--;
 		}
 	} while (R1_CURRENT_STATE(status) == R1_STATE_PRG || busy);
 
@@ -717,7 +762,10 @@
 
 	data.sg = &sg;
 	data.sg_len = 1;
+	data.timeout_ns = 1000000;
+	data.timeout_clks = 0;
 	mmc_set_data_timeout(&data, card);
+
 	sg_init_one(&sg, data_buf, len);
 	mmc_wait_for_req(host, &mrq);
 	err = 0;
@@ -765,7 +813,7 @@
 	unsigned int opcode;
 	int err;
 
-	if (!card->ext_csd.hpi) {
+	if (!card->ext_csd.hpi_en) {
 		pr_warn("%s: Card didn't support HPI command\n",
 			mmc_hostname(card->host));
 		return -EINVAL;
@@ -782,7 +830,7 @@
 
 	err = mmc_wait_for_cmd(card->host, &cmd, 0);
 	if (err) {
-		pr_warn("%s: error %d interrupting operation. "
+		pr_debug("%s: error %d interrupting operation. "
 			"HPI command response %#x\n", mmc_hostname(card->host),
 			err, cmd.resp[0]);
 		return err;
@@ -797,3 +845,21 @@
 {
 	return (card && card->csd.mmca_vsn > CSD_SPEC_VER_3);
 }
+
+int mmc_discard_queue(struct mmc_host *host, u32 tasks)
+{
+	struct mmc_command cmd = {0};
+
+	cmd.opcode = MMC_CMDQ_TASK_MGMT;
+	if (tasks) {
+		cmd.arg = DISCARD_TASK;
+		cmd.arg |= (tasks << 16);
+	} else {
+		cmd.arg = DISCARD_QUEUE;
+	}
+
+	cmd.flags = MMC_RSP_R1B | MMC_CMD_AC;
+
+	return mmc_wait_for_cmd(host, &cmd, 0);
+}
+EXPORT_SYMBOL(mmc_discard_queue);
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index f1b8e81..ad1058c 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -27,10 +27,12 @@
 int mmc_bus_test(struct mmc_card *card, u8 bus_width);
 int mmc_send_hpi_cmd(struct mmc_card *card, u32 *status);
 int mmc_can_ext_csd(struct mmc_card *card);
+int mmc_discard_queue(struct mmc_host *host, u32 tasks);
 int mmc_switch_status_error(struct mmc_host *host, u32 status);
 int __mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 		unsigned int timeout_ms, bool use_busy_signal, bool send_status,
 		bool ignore_crc);
-
+int __mmc_send_status(struct mmc_card *card, u32 *status,
+				    bool ignore_crc);
 #endif
 
diff --git a/drivers/mmc/core/quirks.c b/drivers/mmc/core/quirks.c
index 4e65ea5..ba44cfd 100644
--- a/drivers/mmc/core/quirks.c
+++ b/drivers/mmc/core/quirks.c
@@ -35,6 +35,51 @@
 #define SDIO_DEVICE_ID_MARVELL_8797_F0	0x9128
 #endif
 
+#ifndef SDIO_VENDOR_ID_MSM
+#define SDIO_VENDOR_ID_MSM		0x0070
+#endif
+
+#ifndef SDIO_DEVICE_ID_MSM_WCN1314
+#define SDIO_DEVICE_ID_MSM_WCN1314	0x2881
+#endif
+
+#ifndef SDIO_VENDOR_ID_MSM_QCA
+#define SDIO_VENDOR_ID_MSM_QCA		0x271
+#endif
+
+#ifndef SDIO_DEVICE_ID_MSM_QCA_AR6003_1
+#define SDIO_DEVICE_ID_MSM_QCA_AR6003_1	0x300
+#endif
+
+#ifndef SDIO_DEVICE_ID_MSM_QCA_AR6003_2
+#define SDIO_DEVICE_ID_MSM_QCA_AR6003_2	0x301
+#endif
+
+#ifndef SDIO_DEVICE_ID_MSM_QCA_AR6004_1
+#define SDIO_DEVICE_ID_MSM_QCA_AR6004_1	0x400
+#endif
+
+#ifndef SDIO_DEVICE_ID_MSM_QCA_AR6004_2
+#define SDIO_DEVICE_ID_MSM_QCA_AR6004_2	0x401
+#endif
+
+#ifndef SDIO_VENDOR_ID_QCA6574
+#define SDIO_VENDOR_ID_QCA6574		0x271
+#endif
+
+#ifndef SDIO_DEVICE_ID_QCA6574
+#define SDIO_DEVICE_ID_QCA6574		0x50a
+#endif
+
+#ifndef SDIO_VENDOR_ID_QCA9377
+#define SDIO_VENDOR_ID_QCA9377		0x271
+#endif
+
+#ifndef SDIO_DEVICE_ID_QCA9377
+#define SDIO_DEVICE_ID_QCA9377		0x701
+#endif
+
+
 /*
  * This hook just adds a quirk for all sdio devices
  */
@@ -54,6 +99,21 @@
 	SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
 		   remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING),
 
+	SDIO_FIXUP(SDIO_VENDOR_ID_MSM, SDIO_DEVICE_ID_MSM_WCN1314,
+		   remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING),
+
+	SDIO_FIXUP(SDIO_VENDOR_ID_MSM_QCA, SDIO_DEVICE_ID_MSM_QCA_AR6003_1,
+		   remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING),
+
+	SDIO_FIXUP(SDIO_VENDOR_ID_MSM_QCA, SDIO_DEVICE_ID_MSM_QCA_AR6003_2,
+		   remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING),
+
+	SDIO_FIXUP(SDIO_VENDOR_ID_MSM_QCA, SDIO_DEVICE_ID_MSM_QCA_AR6004_1,
+		   remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING),
+
+	SDIO_FIXUP(SDIO_VENDOR_ID_MSM_QCA, SDIO_DEVICE_ID_MSM_QCA_AR6004_2,
+		   remove_quirk, MMC_QUIRK_BROKEN_CLK_GATING),
+
 	SDIO_FIXUP(SDIO_VENDOR_ID_TI, SDIO_DEVICE_ID_TI_WL1271,
 		   add_quirk, MMC_QUIRK_NONSTD_FUNC_IF),
 
@@ -66,6 +126,11 @@
 	SDIO_FIXUP(SDIO_VENDOR_ID_MARVELL, SDIO_DEVICE_ID_MARVELL_8797_F0,
 		   add_quirk, MMC_QUIRK_BROKEN_IRQ_POLLING),
 
+	SDIO_FIXUP(SDIO_VENDOR_ID_QCA6574, SDIO_DEVICE_ID_QCA6574,
+		   add_quirk, MMC_QUIRK_QCA6574_SETTINGS),
+
+	SDIO_FIXUP(SDIO_VENDOR_ID_QCA9377, SDIO_DEVICE_ID_QCA9377,
+		add_quirk, MMC_QUIRK_QCA9377_SETTINGS),
 	END_FIXUP
 };
 
@@ -86,6 +151,8 @@
 		    (f->name == CID_NAME_ANY ||
 		     !strncmp(f->name, card->cid.prod_name,
 			      sizeof(card->cid.prod_name))) &&
+		    (f->ext_csd_rev == EXT_CSD_REV_ANY ||
+		     f->ext_csd_rev == card->ext_csd.rev) &&
 		    (f->cis_vendor == card->cis.vendor ||
 		     f->cis_vendor == (u16) SDIO_ANY_ID) &&
 		    (f->cis_device == card->cis.device ||
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 60542b2..7f66ad3 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -27,6 +27,12 @@
 #include "sd.h"
 #include "sd_ops.h"
 
+#define UHS_SDR104_MIN_DTR	(100 * 1000 * 1000)
+#define UHS_DDR50_MIN_DTR	(50 * 1000 * 1000)
+#define UHS_SDR50_MIN_DTR	(50 * 1000 * 1000)
+#define UHS_SDR25_MIN_DTR	(25 * 1000 * 1000)
+#define UHS_SDR12_MIN_DTR	(12.5 * 1000 * 1000)
+
 static const unsigned int tran_exp[] = {
 	10000,		100000,		1000000,	10000000,
 	0,		0,		0,		0
@@ -368,9 +374,9 @@
 		goto out;
 
 	if ((status[16] & 0xF) != 1) {
-		pr_warn("%s: Problem switching card into high-speed mode!\n",
-			mmc_hostname(card->host));
-		err = 0;
+		pr_warn("%s: Problem switching card into high-speed mode!, status:%x\n",
+			mmc_hostname(card->host), (status[16] & 0xF));
+		err = -EBUSY;
 	} else {
 		err = 1;
 	}
@@ -424,18 +430,22 @@
 	}
 
 	if ((card->host->caps & MMC_CAP_UHS_SDR104) &&
-	    (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104)) {
+	    (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR104) &&
+	    (card->host->f_max > UHS_SDR104_MIN_DTR)) {
 			card->sd_bus_speed = UHS_SDR104_BUS_SPEED;
 	} else if ((card->host->caps & MMC_CAP_UHS_DDR50) &&
-		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_DDR50)) {
+		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_DDR50) &&
+		    (card->host->f_max > UHS_DDR50_MIN_DTR)) {
 			card->sd_bus_speed = UHS_DDR50_BUS_SPEED;
 	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
 		    MMC_CAP_UHS_SDR50)) && (card->sw_caps.sd3_bus_mode &
-		    SD_MODE_UHS_SDR50)) {
+		    SD_MODE_UHS_SDR50) &&
+		    (card->host->f_max > UHS_SDR50_MIN_DTR)) {
 			card->sd_bus_speed = UHS_SDR50_BUS_SPEED;
 	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
 		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25)) &&
-		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR25)) {
+		   (card->sw_caps.sd3_bus_mode & SD_MODE_UHS_SDR25) &&
+		 (card->host->f_max > UHS_SDR25_MIN_DTR)) {
 			card->sd_bus_speed = UHS_SDR25_BUS_SPEED;
 	} else if ((card->host->caps & (MMC_CAP_UHS_SDR104 |
 		    MMC_CAP_UHS_SDR50 | MMC_CAP_UHS_SDR25 |
@@ -479,15 +489,17 @@
 	if (err)
 		return err;
 
-	if ((status[16] & 0xF) != card->sd_bus_speed)
-		pr_warn("%s: Problem setting bus speed mode!\n",
-			mmc_hostname(card->host));
-	else {
+	if ((status[16] & 0xF) != card->sd_bus_speed) {
+		pr_warn("%s: Problem setting bus speed mode(%u)! max_dtr:%u, timing:%u, status:%x\n",
+			mmc_hostname(card->host), card->sd_bus_speed,
+			card->sw_caps.uhs_max_dtr, timing, (status[16] & 0xF));
+		err = -EBUSY;
+	} else {
 		mmc_set_timing(card->host, timing);
 		mmc_set_clock(card->host, card->sw_caps.uhs_max_dtr);
 	}
 
-	return 0;
+	return err;
 }
 
 /* Get host's max current setting at its current voltage */
@@ -579,6 +591,64 @@
 	return 0;
 }
 
+/**
+ * mmc_sd_change_bus_speed() - Change SD card bus frequency at runtime
+ * @host: pointer to mmc host structure
+ * @freq: pointer to desired frequency to be set
+ *
+ * Change the SD card bus frequency at runtime after the card is
+ * initialized. Callers are expected to make sure of the card's
+ * state (DATA/RCV/TRANSFER) beforing changing the frequency at runtime.
+ *
+ * If the frequency to change is greater than max. supported by card,
+ * *freq is changed to max. supported by card and if it is less than min.
+ * supported by host, *freq is changed to min. supported by host.
+ */
+static int mmc_sd_change_bus_speed(struct mmc_host *host, unsigned long *freq)
+{
+	int err = 0;
+	struct mmc_card *card;
+
+	mmc_claim_host(host);
+	/*
+	 * Assign card pointer after claiming host to avoid race
+	 * conditions that may arise during removal of the card.
+	 */
+	card = host->card;
+
+	/* sanity checks */
+	if (!card || !freq) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	mmc_set_clock(host, (unsigned int) (*freq));
+
+	if (!mmc_host_is_spi(card->host) && mmc_card_uhs(card)
+			&& card->host->ops->execute_tuning) {
+		/*
+		 * We try to probe host driver for tuning for any
+		 * frequency, it is host driver responsibility to
+		 * perform actual tuning only when required.
+		 */
+		mmc_host_clk_hold(card->host);
+		err = card->host->ops->execute_tuning(card->host,
+				MMC_SEND_TUNING_BLOCK);
+		mmc_host_clk_release(card->host);
+
+		if (err) {
+			pr_warn("%s: %s: tuning execution failed %d. Restoring to previous clock %lu\n",
+				   mmc_hostname(card->host), __func__, err,
+				   host->clk_scaling.curr_freq);
+			mmc_set_clock(host, host->clk_scaling.curr_freq);
+		}
+	}
+
+out:
+	mmc_release_host(host);
+	return err;
+}
+
 /*
  * UHS-I specific initialization procedure
  */
@@ -935,7 +1005,10 @@
 {
 	unsigned max_dtr = (unsigned int)-1;
 
-	if (mmc_card_hs(card)) {
+	if (mmc_card_uhs(card)) {
+		if (max_dtr > card->sw_caps.uhs_max_dtr)
+			max_dtr = card->sw_caps.uhs_max_dtr;
+	} else if (mmc_card_hs(card)) {
 		if (max_dtr > card->sw_caps.hs_max_dtr)
 			max_dtr = card->sw_caps.hs_max_dtr;
 	} else if (max_dtr > card->csd.max_dtr) {
@@ -997,6 +1070,7 @@
 		err = mmc_send_relative_addr(host, &card->rca);
 		if (err)
 			goto free_card;
+		host->card = card;
 	}
 
 	if (!oldcard) {
@@ -1060,12 +1134,16 @@
 		}
 	}
 
-	host->card = card;
+	card->clk_scaling_highest = mmc_sd_get_max_clock(card);
+	card->clk_scaling_lowest = host->f_min;
+
 	return 0;
 
 free_card:
-	if (!oldcard)
+	if (!oldcard) {
+		host->card = NULL;
 		mmc_remove_card(card);
+	}
 
 	return err;
 }
@@ -1078,8 +1156,12 @@
 	BUG_ON(!host);
 	BUG_ON(!host->card);
 
+	mmc_exit_clk_scaling(host);
 	mmc_remove_card(host->card);
+
+	mmc_claim_host(host);
 	host->card = NULL;
+	mmc_release_host(host);
 }
 
 /*
@@ -1121,6 +1203,7 @@
 	if (!retries) {
 		printk(KERN_ERR "%s(%s): Unable to re-detect card (%d)\n",
 		       __func__, mmc_hostname(host), err);
+		err = _mmc_detect_card_removed(host);
 	}
 #else
 	err = _mmc_detect_card_removed(host);
@@ -1145,6 +1228,13 @@
 	BUG_ON(!host);
 	BUG_ON(!host->card);
 
+	err = mmc_suspend_clk_scaling(host);
+	if (err) {
+		pr_err("%s: %s: fail to suspend clock scaling (%d)\n",
+			mmc_hostname(host), __func__,  err);
+		return err;
+	}
+
 	mmc_claim_host(host);
 
 	if (mmc_card_suspended(host->card))
@@ -1170,11 +1260,13 @@
 {
 	int err;
 
+	MMC_TRACE(host, "%s: Enter\n", __func__);
 	err = _mmc_sd_suspend(host);
 	if (!err) {
 		pm_runtime_disable(&host->card->dev);
 		pm_runtime_set_suspended(&host->card->dev);
 	}
+	MMC_TRACE(host, "%s: Exit err: %d\n", __func__, err);
 
 	return err;
 }
@@ -1207,8 +1299,11 @@
 		if (err) {
 			printk(KERN_ERR "%s: Re-init card rc = %d (retries = %d)\n",
 			       mmc_hostname(host), err, retries);
-			mdelay(5);
 			retries--;
+			mmc_power_off(host);
+			usleep_range(5000, 5500);
+			mmc_power_up(host, host->card->ocr);
+			mmc_select_voltage(host, host->card->ocr);
 			continue;
 		}
 		break;
@@ -1218,6 +1313,13 @@
 #endif
 	mmc_card_clr_suspended(host->card);
 
+	err = mmc_resume_clk_scaling(host);
+	if (err) {
+		pr_err("%s: %s: fail to resume clock scaling (%d)\n",
+			mmc_hostname(host), __func__, err);
+		goto out;
+	}
+
 out:
 	mmc_release_host(host);
 	return err;
@@ -1228,7 +1330,12 @@
  */
 static int mmc_sd_resume(struct mmc_host *host)
 {
+	int err = 0;
+
+	MMC_TRACE(host, "%s: Enter\n", __func__);
 	pm_runtime_enable(&host->card->dev);
+
+	MMC_TRACE(host, "%s: Exit err: %d\n", __func__, err);
 	return 0;
 }
 
@@ -1279,7 +1386,7 @@
 	.suspend = mmc_sd_suspend,
 	.resume = mmc_sd_resume,
 	.alive = mmc_sd_alive,
-	.shutdown = mmc_sd_suspend,
+	.change_bus_speed = mmc_sd_change_bus_speed,
 	.reset = mmc_sd_reset,
 };
 
@@ -1335,6 +1442,10 @@
 		err = mmc_sd_init_card(host, rocr, NULL);
 		if (err) {
 			retries--;
+			mmc_power_off(host);
+			usleep_range(5000, 5500);
+			mmc_power_up(host, rocr);
+			mmc_select_voltage(host, rocr);
 			continue;
 		}
 		break;
@@ -1357,6 +1468,13 @@
 		goto remove_card;
 
 	mmc_claim_host(host);
+
+	err = mmc_init_clk_scaling(host);
+	if (err) {
+		mmc_release_host(host);
+		goto remove_card;
+	}
+
 	return 0;
 
 remove_card:
diff --git a/drivers/mmc/core/sdio.c b/drivers/mmc/core/sdio.c
index 8e10bdc..9ebe730 100644
--- a/drivers/mmc/core/sdio.c
+++ b/drivers/mmc/core/sdio.c
@@ -186,6 +186,23 @@
 				card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_C;
 			if (data & SDIO_DRIVE_SDTD)
 				card->sw_caps.sd3_drv_type |= SD_DRIVER_TYPE_D;
+
+			ret = mmc_io_rw_direct(card, 0, 0,
+				SDIO_CCCR_INTERRUPT_EXTENSION, 0, &data);
+			if (ret)
+				goto out;
+			if (data & SDIO_SUPPORT_ASYNC_INTR) {
+				if (card->host->caps2 &
+				    MMC_CAP2_ASYNC_SDIO_IRQ_4BIT_MODE) {
+					data |= SDIO_ENABLE_ASYNC_INTR;
+					ret = mmc_io_rw_direct(card, 1, 0,
+						SDIO_CCCR_INTERRUPT_EXTENSION,
+						data, NULL);
+					if (ret)
+						goto out;
+					card->cccr.async_intr_sup = 1;
+				}
+			}
 		}
 
 		/* if no uhs mode ensure we check for high speed */
@@ -204,12 +221,60 @@
 	return ret;
 }
 
+static void sdio_enable_vendor_specific_settings(struct mmc_card *card)
+{
+	int ret;
+	u8 settings;
+
+	if (mmc_enable_qca6574_settings(card) ||
+		mmc_enable_qca9377_settings(card)) {
+		ret = mmc_io_rw_direct(card, 1, 0, 0xF2, 0x0F, NULL);
+		if (ret) {
+			pr_crit("%s: failed to write to fn 0xf2 %d\n",
+					mmc_hostname(card->host), ret);
+			goto out;
+		}
+
+		ret = mmc_io_rw_direct(card, 0, 0, 0xF1, 0, &settings);
+		if (ret) {
+			pr_crit("%s: failed to read fn 0xf1 %d\n",
+					mmc_hostname(card->host), ret);
+			goto out;
+		}
+
+		settings |= 0x80;
+		ret = mmc_io_rw_direct(card, 1, 0, 0xF1, settings, NULL);
+		if (ret) {
+			pr_crit("%s: failed to write to fn 0xf1 %d\n",
+					mmc_hostname(card->host), ret);
+			goto out;
+		}
+
+		ret = mmc_io_rw_direct(card, 0, 0, 0xF0, 0, &settings);
+		if (ret) {
+			pr_crit("%s: failed to read fn 0xf0 %d\n",
+					mmc_hostname(card->host), ret);
+			goto out;
+		}
+
+		settings |= 0x20;
+		ret = mmc_io_rw_direct(card, 1, 0, 0xF0, settings, NULL);
+		if (ret) {
+			pr_crit("%s: failed to write to fn 0xf0 %d\n",
+					mmc_hostname(card->host), ret);
+			goto out;
+		}
+	}
+out:
+	return;
+}
+
 static int sdio_enable_wide(struct mmc_card *card)
 {
 	int ret;
 	u8 ctrl;
 
-	if (!(card->host->caps & MMC_CAP_4_BIT_DATA))
+	if (!(card->host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA)))
 		return 0;
 
 	if (card->cccr.low_speed && !card->cccr.wide_bus)
@@ -225,7 +290,10 @@
 
 	/* set as 4-bit bus width */
 	ctrl &= ~SDIO_BUS_WIDTH_MASK;
-	ctrl |= SDIO_BUS_WIDTH_4BIT;
+	if (card->host->caps & MMC_CAP_8_BIT_DATA)
+		ctrl |= SDIO_BUS_WIDTH_8BIT;
+	else if (card->host->caps & MMC_CAP_4_BIT_DATA)
+		ctrl |= SDIO_BUS_WIDTH_4BIT;
 
 	ret = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_IF, ctrl, NULL);
 	if (ret)
@@ -266,7 +334,7 @@
 	int ret;
 	u8 ctrl;
 
-	if (!(card->host->caps & MMC_CAP_4_BIT_DATA))
+	if (!(card->host->caps & (MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA)))
 		return 0;
 
 	if (card->cccr.low_speed && !card->cccr.wide_bus)
@@ -276,10 +344,10 @@
 	if (ret)
 		return ret;
 
-	if (!(ctrl & SDIO_BUS_WIDTH_4BIT))
+	if (!(ctrl & (SDIO_BUS_WIDTH_4BIT | SDIO_BUS_WIDTH_8BIT)))
 		return 0;
 
-	ctrl &= ~SDIO_BUS_WIDTH_4BIT;
+	ctrl &= ~(SDIO_BUS_WIDTH_4BIT | SDIO_BUS_WIDTH_8BIT);
 	ctrl |= SDIO_BUS_ASYNC_INT;
 
 	ret = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_IF, ctrl, NULL);
@@ -496,6 +564,9 @@
 	if (err)
 		return err;
 
+	/* Vendor specific settings based on card quirks */
+	sdio_enable_vendor_specific_settings(card);
+
 	speed &= ~SDIO_SPEED_BSS_MASK;
 	speed |= bus_speed;
 	err = mmc_io_rw_direct(card, 1, 0, SDIO_CCCR_SPEED, speed, NULL);
@@ -622,8 +693,11 @@
 	/*
 	 * Call the optional HC's init_card function to handle quirks.
 	 */
-	if (host->ops->init_card)
+	if (host->ops->init_card) {
+		mmc_host_clk_hold(host);
 		host->ops->init_card(host, card);
+		mmc_host_clk_release(host);
+	}
 
 	/*
 	 * If the host and card support UHS-I mode request the card
@@ -790,7 +864,12 @@
 		 * Switch to wider bus (if supported).
 		 */
 		err = sdio_enable_4bit_bus(card);
-		if (err)
+		if (err > 0) {
+			if (card->host->caps & MMC_CAP_8_BIT_DATA)
+				mmc_set_bus_width(card->host, MMC_BUS_WIDTH_8);
+			else if (card->host->caps & MMC_CAP_4_BIT_DATA)
+				mmc_set_bus_width(card->host, MMC_BUS_WIDTH_4);
+		} else if (err)
 			goto remove;
 	}
 finish:
@@ -917,6 +996,7 @@
  */
 static int mmc_sdio_suspend(struct mmc_host *host)
 {
+	MMC_TRACE(host, "%s: Enter\n", __func__);
 	mmc_claim_host(host);
 
 	if (mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host))
@@ -924,13 +1004,15 @@
 
 	if (!mmc_card_keep_power(host)) {
 		mmc_power_off(host);
+	} else if (host->ios.clock) {
+		mmc_gate_clock(host);
 	} else if (host->retune_period) {
 		mmc_retune_timer_stop(host);
 		mmc_retune_needed(host);
 	}
 
 	mmc_release_host(host);
-
+	MMC_TRACE(host, "%s: Exit\n", __func__);
 	return 0;
 }
 
@@ -941,6 +1023,7 @@
 	BUG_ON(!host);
 	BUG_ON(!host->card);
 
+	MMC_TRACE(host, "%s: Enter\n", __func__);
 	/* Basic card reinitialization. */
 	mmc_claim_host(host);
 
@@ -973,6 +1056,13 @@
 	} else if (mmc_card_keep_power(host) && mmc_card_wake_sdio_irq(host)) {
 		/* We may have switched to 1-bit mode during suspend */
 		err = sdio_enable_4bit_bus(host->card);
+		if (err > 0) {
+			if (host->caps & MMC_CAP_8_BIT_DATA)
+				mmc_set_bus_width(host, MMC_BUS_WIDTH_8);
+			else if (host->caps & MMC_CAP_4_BIT_DATA)
+				mmc_set_bus_width(host, MMC_BUS_WIDTH_4);
+			err = 0;
+		}
 	}
 
 	if (!err && host->sdio_irqs) {
@@ -988,6 +1078,8 @@
 	mmc_release_host(host);
 
 	host->pm_flags &= ~MMC_PM_KEEP_POWER;
+	host->pm_flags &= ~MMC_PM_WAKE_SDIO_IRQ;
+	MMC_TRACE(host, "%s: Exit err: %d\n", __func__, err);
 	return err;
 }
 
@@ -1222,40 +1314,6 @@
 
 int sdio_reset_comm(struct mmc_card *card)
 {
-	struct mmc_host *host = card->host;
-	u32 ocr;
-	u32 rocr;
-	int err;
-
-	printk("%s():\n", __func__);
-	mmc_claim_host(host);
-
-	mmc_retune_disable(host);
-
-	mmc_go_idle(host);
-
-	mmc_set_clock(host, host->f_min);
-
-	err = mmc_send_io_op_cond(host, 0, &ocr);
-	if (err)
-		goto err;
-
-	rocr = mmc_select_voltage(host, ocr);
-	if (!rocr) {
-		err = -EINVAL;
-		goto err;
-	}
-
-	err = mmc_sdio_init_card(host, rocr, card, 0);
-	if (err)
-		goto err;
-
-	mmc_release_host(host);
-	return 0;
-err:
-	printk("%s: Error resetting SDIO communications (%d)\n",
-	       mmc_hostname(host), err);
-	mmc_release_host(host);
-	return err;
+	return mmc_power_restore_host(card->host);
 }
 EXPORT_SYMBOL(sdio_reset_comm);
diff --git a/drivers/mmc/core/sdio_cis.c b/drivers/mmc/core/sdio_cis.c
index dcb3dee..5d7f198 100644
--- a/drivers/mmc/core/sdio_cis.c
+++ b/drivers/mmc/core/sdio_cis.c
@@ -55,7 +55,7 @@
 
 	for (i = 0; i < nr_strings; i++) {
 		buffer[i] = string;
-		strcpy(string, buf);
+		strlcpy(string, buf, strlen(buf) + 1);
 		string += strlen(string) + 1;
 		buf += strlen(buf) + 1;
 	}
@@ -276,8 +276,16 @@
 			break;
 
 		/* null entries have no link field or data */
-		if (tpl_code == 0x00)
-			continue;
+		if (tpl_code == 0x00) {
+			if (card->cis.vendor == 0x70 &&
+				(card->cis.device == 0x2460 ||
+				 card->cis.device == 0x0460 ||
+				 card->cis.device == 0x23F1 ||
+				 card->cis.device == 0x23F0))
+				break;
+			else
+				continue;
+		}
 
 		ret = mmc_io_rw_direct(card, 0, 0, ptr++, 0, &tpl_link);
 		if (ret)
diff --git a/drivers/mmc/core/sdio_irq.c b/drivers/mmc/core/sdio_irq.c
index 09cc67d..95589d1 100644
--- a/drivers/mmc/core/sdio_irq.c
+++ b/drivers/mmc/core/sdio_irq.c
@@ -93,7 +93,9 @@
 {
 	mmc_claim_host(host);
 	host->sdio_irq_pending = true;
+	mmc_host_clk_hold(host);
 	process_sdio_pending_irqs(host);
+	mmc_host_clk_release(host);
 	mmc_release_host(host);
 }
 EXPORT_SYMBOL_GPL(sdio_run_irqs);
@@ -104,6 +106,7 @@
 	struct sched_param param = { .sched_priority = 1 };
 	unsigned long period, idle_period;
 	int ret;
+	bool ws;
 
 	sched_setscheduler(current, SCHED_FIFO, &param);
 
@@ -137,6 +140,17 @@
 		ret = __mmc_claim_host(host, &host->sdio_irq_thread_abort);
 		if (ret)
 			break;
+		ws = false;
+		/*
+		 * prevent suspend if it has started when scheduled;
+		 * 100 msec (approx. value) should be enough for the system to
+		 * resume and attend to the card's request
+		 */
+		if ((host->dev_status == DEV_SUSPENDING) ||
+		    (host->dev_status == DEV_SUSPENDED)) {
+			pm_wakeup_event(&host->card->dev, 100);
+			ws = true;
+		}
 		ret = process_sdio_pending_irqs(host);
 		host->sdio_irq_pending = false;
 		mmc_release_host(host);
@@ -173,6 +187,12 @@
 			host->ops->enable_sdio_irq(host, 1);
 			mmc_host_clk_release(host);
 		}
+		/*
+		 * function drivers would have processed the event from card
+		 * unless suspended, hence release wake source
+		 */
+		if (ws && (host->dev_status == DEV_RESUMED))
+			pm_relax(&host->card->dev);
 		if (!kthread_should_stop())
 			schedule_timeout(period);
 		set_current_state(TASK_RUNNING);
diff --git a/drivers/mmc/host/Kconfig b/drivers/mmc/host/Kconfig
index defb66e..515abb2 100644
--- a/drivers/mmc/host/Kconfig
+++ b/drivers/mmc/host/Kconfig
@@ -399,6 +399,8 @@
 	tristate "Qualcomm Technologies, Inc. SDHCI Controller Support"
 	depends on ARCH_QCOM || ARCH_MSM || (ARM && COMPILE_TEST)
 	depends on MMC_SDHCI_PLTFM
+	select PM_DEVFREQ
+	select DEVFREQ_GOV_SIMPLE_ONDEMAND
 	help
 	  This selects the Secure Digital Host Controller Interface (SDHCI)
 	  support present in Qualcomm Technologies, Inc. SOCs. The controller
@@ -408,6 +410,17 @@
 
 	  If unsure, say N.
 
+config MMC_SDHCI_MSM_ICE
+	bool "Qualcomm Technologies, Inc Inline Crypto Engine for SDHCI core"
+	depends on MMC_SDHCI_MSM && CRYPTO_DEV_QCOM_ICE
+	help
+	  This selects the QTI specific additions to support Inline Crypto
+	  Engine (ICE). ICE accelerates the crypto operations and maintains
+	  the high SDHCI performance.
+
+	  Select this if you have ICE supported for SDHCI on QTI chipset.
+	  If unsure, say N.
+
 config MMC_MSM
 	tristate "Qualcomm SDCC Controller Support"
 	depends on MMC && (ARCH_MSM7X00A || ARCH_MSM7X30 || ARCH_QSD8X50)
@@ -772,6 +785,19 @@
 	  This selects support for the SD/MMC Host Controller on
 	  Allwinner sunxi SoCs.
 
+config MMC_CQ_HCI
+	tristate "Command Queue Support"
+	depends on HAS_DMA
+	help
+	  This selects the Command Queue Host Controller Interface (CQHCI)
+	  support present in host controllers of Qualcomm Technologies, Inc
+	  amongst others.
+	  This controller supports eMMC devices with command queue support.
+
+	  If you have a controller with this interface, say Y or M here.
+
+	  If unsure, say N.
+
 config MMC_TOSHIBA_PCI
 	tristate "Toshiba Type A SD/MMC Card Interface Driver"
 	depends on PCI
diff --git a/drivers/mmc/host/Makefile b/drivers/mmc/host/Makefile
index ef56624..64defc5 100644
--- a/drivers/mmc/host/Makefile
+++ b/drivers/mmc/host/Makefile
@@ -72,8 +72,10 @@
 obj-$(CONFIG_MMC_SDHCI_OF_HLWD)		+= sdhci-of-hlwd.o
 obj-$(CONFIG_MMC_SDHCI_BCM_KONA)	+= sdhci-bcm-kona.o
 obj-$(CONFIG_MMC_SDHCI_MSM)		+= sdhci-msm.o
+obj-$(CONFIG_MMC_SDHCI_MSM_ICE)		+= sdhci-msm-ice.o
 obj-$(CONFIG_MMC_SDHCI_IPROC)		+= sdhci-iproc.o
 obj-$(CONFIG_MMC_SDHCI_ST)		+= sdhci-st.o
+obj-$(CONFIG_MMC_CQ_HCI)		+= cmdq_hci.o
 obj-$(CONFIG_MMC_SDHCI_MICROCHIP_PIC32)	+= sdhci-pic32.o
 obj-$(CONFIG_MMC_SDHCI_BRCMSTB)		+= sdhci-brcmstb.o
 
diff --git a/drivers/mmc/host/cmdq_hci.c b/drivers/mmc/host/cmdq_hci.c
new file mode 100644
index 0000000..77c5ca3
--- /dev/null
+++ b/drivers/mmc/host/cmdq_hci.c
@@ -0,0 +1,1197 @@
+/* Copyright (c) 2015-2017 The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/delay.h>
+#include <linux/highmem.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/dma-mapping.h>
+#include <linux/slab.h>
+#include <linux/scatterlist.h>
+#include <linux/platform_device.h>
+#include <linux/blkdev.h>
+
+#include <linux/mmc/mmc.h>
+#include <linux/mmc/host.h>
+#include <linux/mmc/card.h>
+#include <linux/pm_runtime.h>
+#include <linux/workqueue.h>
+
+#include "cmdq_hci.h"
+#include "sdhci.h"
+#include "sdhci-msm.h"
+
+#define DCMD_SLOT 31
+#define NUM_SLOTS 32
+
+/* 10 sec */
+#define HALT_TIMEOUT_MS 10000
+
+static int cmdq_halt_poll(struct mmc_host *mmc, bool halt);
+static int cmdq_halt(struct mmc_host *mmc, bool halt);
+
+#ifdef CONFIG_PM_RUNTIME
+static int cmdq_runtime_pm_get(struct cmdq_host *host)
+{
+	return pm_runtime_get_sync(host->mmc->parent);
+}
+static int cmdq_runtime_pm_put(struct cmdq_host *host)
+{
+	pm_runtime_mark_last_busy(host->mmc->parent);
+	return pm_runtime_put_autosuspend(host->mmc->parent);
+}
+#else
+static inline int cmdq_runtime_pm_get(struct cmdq_host *host)
+{
+	return 0;
+}
+static inline int cmdq_runtime_pm_put(struct cmdq_host *host)
+{
+	return 0;
+}
+#endif
+static inline struct mmc_request *get_req_by_tag(struct cmdq_host *cq_host,
+					  unsigned int tag)
+{
+	return cq_host->mrq_slot[tag];
+}
+
+static inline u8 *get_desc(struct cmdq_host *cq_host, u8 tag)
+{
+	return cq_host->desc_base + (tag * cq_host->slot_sz);
+}
+
+static inline u8 *get_link_desc(struct cmdq_host *cq_host, u8 tag)
+{
+	u8 *desc = get_desc(cq_host, tag);
+
+	return desc + cq_host->task_desc_len;
+}
+
+static inline dma_addr_t get_trans_desc_dma(struct cmdq_host *cq_host, u8 tag)
+{
+	return cq_host->trans_desc_dma_base +
+		(cq_host->mmc->max_segs * tag *
+		 cq_host->trans_desc_len);
+}
+
+static inline u8 *get_trans_desc(struct cmdq_host *cq_host, u8 tag)
+{
+	return cq_host->trans_desc_base +
+		(cq_host->trans_desc_len * cq_host->mmc->max_segs * tag);
+}
+
+static void setup_trans_desc(struct cmdq_host *cq_host, u8 tag)
+{
+	u8 *link_temp;
+	dma_addr_t trans_temp;
+
+	link_temp = get_link_desc(cq_host, tag);
+	trans_temp = get_trans_desc_dma(cq_host, tag);
+
+	memset(link_temp, 0, cq_host->link_desc_len);
+	if (cq_host->link_desc_len > 8)
+		*(link_temp + 8) = 0;
+
+	if (tag == DCMD_SLOT) {
+		*link_temp = VALID(0) | ACT(0) | END(1);
+		return;
+	}
+
+	*link_temp = VALID(1) | ACT(0x6) | END(0);
+
+	if (cq_host->dma64) {
+		__le64 *data_addr = (__le64 __force *)(link_temp + 4);
+		data_addr[0] = cpu_to_le64(trans_temp);
+	} else {
+		__le32 *data_addr = (__le32 __force *)(link_temp + 4);
+		data_addr[0] = cpu_to_le32(trans_temp);
+	}
+}
+
+static void cmdq_set_halt_irq(struct cmdq_host *cq_host, bool enable)
+{
+	u32 ier;
+
+	ier = cmdq_readl(cq_host, CQISTE);
+	if (enable) {
+		cmdq_writel(cq_host, ier | HALT, CQISTE);
+		cmdq_writel(cq_host, ier | HALT, CQISGE);
+	} else {
+		cmdq_writel(cq_host, ier & ~HALT, CQISTE);
+		cmdq_writel(cq_host, ier & ~HALT, CQISGE);
+	}
+}
+
+static void cmdq_clear_set_irqs(struct cmdq_host *cq_host, u32 clear, u32 set)
+{
+	u32 ier;
+
+	ier = cmdq_readl(cq_host, CQISTE);
+	ier &= ~clear;
+	ier |= set;
+	cmdq_writel(cq_host, ier, CQISTE);
+	cmdq_writel(cq_host, ier, CQISGE);
+	/* ensure the writes are done */
+	mb();
+}
+
+
+#define DRV_NAME "cmdq-host"
+
+static void cmdq_dump_task_history(struct cmdq_host *cq_host)
+{
+	int i;
+
+	if (likely(!cq_host->mmc->cmdq_thist_enabled))
+		return;
+
+	if (!cq_host->thist) {
+		pr_err("%s: %s: CMDQ task history buffer not allocated\n",
+			mmc_hostname(cq_host->mmc), __func__);
+		return;
+	}
+
+	pr_err("---- Circular Task History ----\n");
+	pr_err(DRV_NAME ": Last entry index: %d", cq_host->thist_idx - 1);
+
+	for (i = 0; i < cq_host->num_slots; i++) {
+		pr_err(DRV_NAME ": [%02d]%s Task: 0x%08x | Args: 0x%08x\n", i,
+			(cq_host->thist[i].is_dcmd) ? "DCMD" : "DATA",
+			lower_32_bits(cq_host->thist[i].task),
+			upper_32_bits(cq_host->thist[i].task));
+	}
+	pr_err("-------------------------\n");
+}
+
+static void cmdq_dump_adma_mem(struct cmdq_host *cq_host)
+{
+	struct mmc_host *mmc = cq_host->mmc;
+	dma_addr_t desc_dma;
+	int tag = 0;
+	unsigned long data_active_reqs =
+		mmc->cmdq_ctx.data_active_reqs;
+	unsigned long desc_size =
+		(cq_host->mmc->max_segs * cq_host->trans_desc_len);
+
+	for_each_set_bit(tag, &data_active_reqs, cq_host->num_slots) {
+		desc_dma = get_trans_desc_dma(cq_host, tag);
+		pr_err("%s: %s: tag = %d, trans_dma(phys) = %pad, trans_desc(virt) = 0x%p\n",
+				mmc_hostname(mmc), __func__, tag,
+				&desc_dma, get_trans_desc(cq_host, tag));
+		print_hex_dump(KERN_ERR, "cmdq-adma:", DUMP_PREFIX_ADDRESS,
+				32, 8, get_trans_desc(cq_host, tag),
+				(desc_size), false);
+	}
+}
+
+static void cmdq_dumpregs(struct cmdq_host *cq_host)
+{
+	struct mmc_host *mmc = cq_host->mmc;
+
+	MMC_TRACE(mmc,
+	"%s: 0x0C=0x%08x 0x10=0x%08x 0x14=0x%08x 0x18=0x%08x 0x28=0x%08x 0x2C=0x%08x 0x30=0x%08x 0x34=0x%08x 0x54=0x%08x 0x58=0x%08x 0x5C=0x%08x 0x48=0x%08x\n",
+	__func__, cmdq_readl(cq_host, CQCTL), cmdq_readl(cq_host, CQIS),
+	cmdq_readl(cq_host, CQISTE), cmdq_readl(cq_host, CQISGE),
+	cmdq_readl(cq_host, CQTDBR), cmdq_readl(cq_host, CQTCN),
+	cmdq_readl(cq_host, CQDQS), cmdq_readl(cq_host, CQDPT),
+	cmdq_readl(cq_host, CQTERRI), cmdq_readl(cq_host, CQCRI),
+	cmdq_readl(cq_host, CQCRA), cmdq_readl(cq_host, CQCRDCT));
+	pr_err(DRV_NAME ": ========== REGISTER DUMP (%s)==========\n",
+		mmc_hostname(mmc));
+
+	pr_err(DRV_NAME ": Caps: 0x%08x		  | Version:  0x%08x\n",
+		cmdq_readl(cq_host, CQCAP),
+		cmdq_readl(cq_host, CQVER));
+	pr_err(DRV_NAME ": Queing config: 0x%08x  | Queue Ctrl:  0x%08x\n",
+		cmdq_readl(cq_host, CQCFG),
+		cmdq_readl(cq_host, CQCTL));
+	pr_err(DRV_NAME ": Int stat: 0x%08x	  | Int enab:  0x%08x\n",
+		cmdq_readl(cq_host, CQIS),
+		cmdq_readl(cq_host, CQISTE));
+	pr_err(DRV_NAME ": Int sig: 0x%08x	  | Int Coal:  0x%08x\n",
+		cmdq_readl(cq_host, CQISGE),
+		cmdq_readl(cq_host, CQIC));
+	pr_err(DRV_NAME ": TDL base: 0x%08x	  | TDL up32:  0x%08x\n",
+		cmdq_readl(cq_host, CQTDLBA),
+		cmdq_readl(cq_host, CQTDLBAU));
+	pr_err(DRV_NAME ": Doorbell: 0x%08x	  | Comp Notif:  0x%08x\n",
+		cmdq_readl(cq_host, CQTDBR),
+		cmdq_readl(cq_host, CQTCN));
+	pr_err(DRV_NAME ": Dev queue: 0x%08x	  | Dev Pend:  0x%08x\n",
+		cmdq_readl(cq_host, CQDQS),
+		cmdq_readl(cq_host, CQDPT));
+	pr_err(DRV_NAME ": Task clr: 0x%08x	  | Send stat 1:  0x%08x\n",
+		cmdq_readl(cq_host, CQTCLR),
+		cmdq_readl(cq_host, CQSSC1));
+	pr_err(DRV_NAME ": Send stat 2: 0x%08x	  | DCMD resp:  0x%08x\n",
+		cmdq_readl(cq_host, CQSSC2),
+		cmdq_readl(cq_host, CQCRDCT));
+	pr_err(DRV_NAME ": Resp err mask: 0x%08x  | Task err:  0x%08x\n",
+		cmdq_readl(cq_host, CQRMEM),
+		cmdq_readl(cq_host, CQTERRI));
+	pr_err(DRV_NAME ": Resp idx 0x%08x	  | Resp arg:  0x%08x\n",
+		cmdq_readl(cq_host, CQCRI),
+		cmdq_readl(cq_host, CQCRA));
+	pr_err(DRV_NAME": Vendor cfg 0x%08x\n",
+	       cmdq_readl(cq_host, CQ_VENDOR_CFG));
+	pr_err(DRV_NAME ": ===========================================\n");
+
+	cmdq_dump_task_history(cq_host);
+	if (cq_host->ops->dump_vendor_regs)
+		cq_host->ops->dump_vendor_regs(mmc);
+}
+
+/**
+ * The allocated descriptor table for task, link & transfer descritors
+ * looks like:
+ * |----------|
+ * |task desc |  |->|----------|
+ * |----------|  |  |trans desc|
+ * |link desc-|->|  |----------|
+ * |----------|          .
+ *      .                .
+ *  no. of slots      max-segs
+ *      .           |----------|
+ * |----------|
+ * The idea here is to create the [task+trans] table and mark & point the
+ * link desc to the transfer desc table on a per slot basis.
+ */
+static int cmdq_host_alloc_tdl(struct cmdq_host *cq_host)
+{
+
+	size_t desc_size;
+	size_t data_size;
+	int i = 0;
+
+	/* task descriptor can be 64/128 bit irrespective of arch */
+	if (cq_host->caps & CMDQ_TASK_DESC_SZ_128) {
+		cmdq_writel(cq_host, cmdq_readl(cq_host, CQCFG) |
+			       CQ_TASK_DESC_SZ, CQCFG);
+		cq_host->task_desc_len = 16;
+	} else {
+		cq_host->task_desc_len = 8;
+	}
+
+	/*
+	 * 96 bits length of transfer desc instead of 128 bits which means
+	 * ADMA would expect next valid descriptor at the 96th bit
+	 * or 128th bit
+	 */
+	if (cq_host->dma64) {
+		if (cq_host->quirks & CMDQ_QUIRK_SHORT_TXFR_DESC_SZ)
+			cq_host->trans_desc_len = 12;
+		else
+			cq_host->trans_desc_len = 16;
+		cq_host->link_desc_len = 16;
+	} else {
+		cq_host->trans_desc_len = 8;
+		cq_host->link_desc_len = 8;
+	}
+
+	/* total size of a slot: 1 task & 1 transfer (link) */
+	cq_host->slot_sz = cq_host->task_desc_len + cq_host->link_desc_len;
+
+	desc_size = cq_host->slot_sz * cq_host->num_slots;
+
+	data_size = cq_host->trans_desc_len * cq_host->mmc->max_segs *
+		(cq_host->num_slots - 1);
+
+	pr_info("%s: desc_size: %d data_sz: %d slot-sz: %d\n", __func__,
+		(int)desc_size, (int)data_size, cq_host->slot_sz);
+
+	/*
+	 * allocate a dma-mapped chunk of memory for the descriptors
+	 * allocate a dma-mapped chunk of memory for link descriptors
+	 * setup each link-desc memory offset per slot-number to
+	 * the descriptor table.
+	 */
+	cq_host->desc_base = dmam_alloc_coherent(mmc_dev(cq_host->mmc),
+						 desc_size,
+						 &cq_host->desc_dma_base,
+						 GFP_KERNEL);
+	cq_host->trans_desc_base = dmam_alloc_coherent(mmc_dev(cq_host->mmc),
+					      data_size,
+					      &cq_host->trans_desc_dma_base,
+					      GFP_KERNEL);
+	cq_host->thist = devm_kzalloc(mmc_dev(cq_host->mmc),
+					(sizeof(*cq_host->thist) *
+						cq_host->num_slots),
+					GFP_KERNEL);
+	if (!cq_host->desc_base || !cq_host->trans_desc_base)
+		return -ENOMEM;
+
+	pr_info("desc-base: 0x%p trans-base: 0x%p\n desc_dma 0x%llx trans_dma: 0x%llx\n",
+		 cq_host->desc_base, cq_host->trans_desc_base,
+		(unsigned long long)cq_host->desc_dma_base,
+		(unsigned long long) cq_host->trans_desc_dma_base);
+
+	for (; i < (cq_host->num_slots); i++)
+		setup_trans_desc(cq_host, i);
+
+	return 0;
+}
+
+static int cmdq_enable(struct mmc_host *mmc)
+{
+	int err = 0;
+	u32 cqcfg;
+	bool dcmd_enable;
+	struct cmdq_host *cq_host = mmc_cmdq_private(mmc);
+
+	if (!cq_host || !mmc->card || !mmc_card_cmdq(mmc->card)) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (cq_host->enabled)
+		goto out;
+
+	cmdq_runtime_pm_get(cq_host);
+	cqcfg = cmdq_readl(cq_host, CQCFG);
+	if (cqcfg & 0x1) {
+		pr_info("%s: %s: cq_host is already enabled\n",
+				mmc_hostname(mmc), __func__);
+		WARN_ON(1);
+		goto pm_ref_count;
+	}
+
+	if (cq_host->quirks & CMDQ_QUIRK_NO_DCMD)
+		dcmd_enable = false;
+	else
+		dcmd_enable = true;
+
+	cqcfg = ((cq_host->caps & CMDQ_TASK_DESC_SZ_128 ? CQ_TASK_DESC_SZ : 0) |
+			(dcmd_enable ? CQ_DCMD : 0));
+
+	cmdq_writel(cq_host, cqcfg, CQCFG);
+	/* enable CQ_HOST */
+	cmdq_writel(cq_host, cmdq_readl(cq_host, CQCFG) | CQ_ENABLE,
+		    CQCFG);
+
+	if (!cq_host->desc_base ||
+			!cq_host->trans_desc_base) {
+		err = cmdq_host_alloc_tdl(cq_host);
+		if (err)
+			goto pm_ref_count;
+	}
+
+	cmdq_writel(cq_host, lower_32_bits(cq_host->desc_dma_base), CQTDLBA);
+	cmdq_writel(cq_host, upper_32_bits(cq_host->desc_dma_base), CQTDLBAU);
+
+	/*
+	 * disable all vendor interrupts
+	 * enable CMDQ interrupts
+	 * enable the vendor error interrupts
+	 */
+	if (cq_host->ops->clear_set_irqs)
+		cq_host->ops->clear_set_irqs(mmc, true);
+
+	cmdq_clear_set_irqs(cq_host, 0x0, CQ_INT_ALL);
+
+	/* cq_host would use this rca to address the card */
+	cmdq_writel(cq_host, mmc->card->rca, CQSSC2);
+
+	/* send QSR at lesser intervals than the default */
+	cmdq_writel(cq_host, SEND_QSR_INTERVAL, CQSSC1);
+
+	/* enable bkops exception indication */
+	if (mmc_card_configured_manual_bkops(mmc->card) &&
+	    !mmc_card_configured_auto_bkops(mmc->card))
+		cmdq_writel(cq_host, cmdq_readl(cq_host, CQRMEM) | CQ_EXCEPTION,
+				CQRMEM);
+
+	/* ensure the writes are done before enabling CQE */
+	mb();
+
+	cq_host->enabled = true;
+	mmc_host_clr_cq_disable(mmc);
+
+	if (cq_host->ops->set_transfer_params)
+		cq_host->ops->set_transfer_params(mmc);
+
+	if (cq_host->ops->set_block_size)
+		cq_host->ops->set_block_size(cq_host->mmc);
+
+	if (cq_host->ops->set_data_timeout)
+		cq_host->ops->set_data_timeout(mmc, 0xf);
+
+	if (cq_host->ops->clear_set_dumpregs)
+		cq_host->ops->clear_set_dumpregs(mmc, 1);
+
+	if (cq_host->ops->enhanced_strobe_mask)
+		cq_host->ops->enhanced_strobe_mask(mmc, true);
+
+pm_ref_count:
+	cmdq_runtime_pm_put(cq_host);
+out:
+	MMC_TRACE(mmc, "%s: CQ enabled err: %d\n", __func__, err);
+	return err;
+}
+
+static void cmdq_disable_nosync(struct mmc_host *mmc, bool soft)
+{
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+
+	if (soft) {
+		cmdq_writel(cq_host, cmdq_readl(
+				    cq_host, CQCFG) & ~(CQ_ENABLE),
+			    CQCFG);
+	}
+	if (cq_host->ops->enhanced_strobe_mask)
+		cq_host->ops->enhanced_strobe_mask(mmc, false);
+
+	cq_host->enabled = false;
+	mmc_host_set_cq_disable(mmc);
+	MMC_TRACE(mmc, "%s: CQ disabled\n", __func__);
+}
+
+static void cmdq_disable(struct mmc_host *mmc, bool soft)
+{
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+
+	cmdq_runtime_pm_get(cq_host);
+	cmdq_disable_nosync(mmc, soft);
+	cmdq_runtime_pm_put(cq_host);
+}
+
+static void cmdq_reset(struct mmc_host *mmc, bool soft)
+{
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+	unsigned int cqcfg;
+	unsigned int tdlba;
+	unsigned int tdlbau;
+	unsigned int rca;
+	int ret;
+
+	cmdq_runtime_pm_get(cq_host);
+	cqcfg = cmdq_readl(cq_host, CQCFG);
+	tdlba = cmdq_readl(cq_host, CQTDLBA);
+	tdlbau = cmdq_readl(cq_host, CQTDLBAU);
+	rca = cmdq_readl(cq_host, CQSSC2);
+
+	cmdq_disable(mmc, true);
+
+	if (cq_host->ops->reset) {
+		ret = cq_host->ops->reset(mmc);
+		if (ret) {
+			pr_crit("%s: reset CMDQ controller: failed\n",
+				mmc_hostname(mmc));
+			BUG();
+		}
+	}
+
+	cmdq_writel(cq_host, tdlba, CQTDLBA);
+	cmdq_writel(cq_host, tdlbau, CQTDLBAU);
+
+	if (cq_host->ops->clear_set_irqs)
+		cq_host->ops->clear_set_irqs(mmc, true);
+
+	cmdq_clear_set_irqs(cq_host, 0x0, CQ_INT_ALL);
+
+	/* cq_host would use this rca to address the card */
+	cmdq_writel(cq_host, rca, CQSSC2);
+
+	/* ensure the writes are done before enabling CQE */
+	mb();
+
+	cmdq_writel(cq_host, cqcfg, CQCFG);
+	cmdq_runtime_pm_put(cq_host);
+	cq_host->enabled = true;
+	mmc_host_clr_cq_disable(mmc);
+}
+
+static void cmdq_prep_task_desc(struct mmc_request *mrq,
+					u64 *data, bool intr, bool qbr)
+{
+	struct mmc_cmdq_req *cmdq_req = mrq->cmdq_req;
+	u32 req_flags = cmdq_req->cmdq_req_flags;
+
+	pr_debug("%s: %s: data-tag: 0x%08x - dir: %d - prio: %d - cnt: 0x%08x -	addr: 0x%llx\n",
+		 mmc_hostname(mrq->host), __func__,
+		 !!(req_flags & DAT_TAG), !!(req_flags & DIR),
+		 !!(req_flags & PRIO), cmdq_req->data.blocks,
+		 (u64)mrq->cmdq_req->blk_addr);
+
+	*data = VALID(1) |
+		END(1) |
+		INT(intr) |
+		ACT(0x5) |
+		FORCED_PROG(!!(req_flags & FORCED_PRG)) |
+		CONTEXT(mrq->cmdq_req->ctx_id) |
+		DATA_TAG(!!(req_flags & DAT_TAG)) |
+		DATA_DIR(!!(req_flags & DIR)) |
+		PRIORITY(!!(req_flags & PRIO)) |
+		QBAR(qbr) |
+		REL_WRITE(!!(req_flags & REL_WR)) |
+		BLK_COUNT(mrq->cmdq_req->data.blocks) |
+		BLK_ADDR((u64)mrq->cmdq_req->blk_addr);
+
+	MMC_TRACE(mrq->host,
+		"%s: Task: 0x%08x | Args: 0x%08x | cnt: 0x%08x\n", __func__,
+		lower_32_bits(*data),
+		upper_32_bits(*data),
+		mrq->cmdq_req->data.blocks);
+}
+
+static int cmdq_dma_map(struct mmc_host *host, struct mmc_request *mrq)
+{
+	int sg_count;
+	struct mmc_data *data = mrq->data;
+
+	if (!data)
+		return -EINVAL;
+
+	sg_count = dma_map_sg(mmc_dev(host), data->sg,
+			      data->sg_len,
+			      (data->flags & MMC_DATA_WRITE) ?
+			      DMA_TO_DEVICE : DMA_FROM_DEVICE);
+	if (!sg_count) {
+		pr_err("%s: sg-len: %d\n", __func__, data->sg_len);
+		return -ENOMEM;
+	}
+
+	return sg_count;
+}
+
+static void cmdq_set_tran_desc(u8 *desc, dma_addr_t addr, int len,
+				bool end, bool is_dma64)
+{
+	__le32 *attr = (__le32 __force *)desc;
+
+	*attr = (VALID(1) |
+		 END(end ? 1 : 0) |
+		 INT(0) |
+		 ACT(0x4) |
+		 DAT_LENGTH(len));
+
+	if (is_dma64) {
+		__le64 *dataddr = (__le64 __force *)(desc + 4);
+
+		dataddr[0] = cpu_to_le64(addr);
+	} else {
+		__le32 *dataddr = (__le32 __force *)(desc + 4);
+
+		dataddr[0] = cpu_to_le32(addr);
+	}
+}
+
+static int cmdq_prep_tran_desc(struct mmc_request *mrq,
+			       struct cmdq_host *cq_host, int tag)
+{
+	struct mmc_data *data = mrq->data;
+	int i, sg_count, len;
+	bool end = false;
+	dma_addr_t addr;
+	u8 *desc;
+	struct scatterlist *sg;
+
+	sg_count = cmdq_dma_map(mrq->host, mrq);
+	if (sg_count < 0) {
+		pr_err("%s: %s: unable to map sg lists, %d\n",
+				mmc_hostname(mrq->host), __func__, sg_count);
+		return sg_count;
+	}
+
+	desc = get_trans_desc(cq_host, tag);
+	memset(desc, 0, cq_host->trans_desc_len * cq_host->mmc->max_segs);
+
+	for_each_sg(data->sg, sg, sg_count, i) {
+		addr = sg_dma_address(sg);
+		len = sg_dma_len(sg);
+
+		if ((i+1) == sg_count)
+			end = true;
+		cmdq_set_tran_desc(desc, addr, len, end, cq_host->dma64);
+		desc += cq_host->trans_desc_len;
+	}
+
+	pr_debug("%s: req: 0x%p tag: %d calc_trans_des: 0x%p sg-cnt: %d\n",
+		__func__, mrq->req, tag, desc, sg_count);
+
+	return 0;
+}
+
+static void cmdq_log_task_desc_history(struct cmdq_host *cq_host, u64 task,
+					bool is_dcmd)
+{
+	if (likely(!cq_host->mmc->cmdq_thist_enabled))
+		return;
+
+	if (!cq_host->thist) {
+		pr_err("%s: %s: CMDQ task history buffer not allocated\n",
+			mmc_hostname(cq_host->mmc), __func__);
+		return;
+	}
+
+	if (cq_host->thist_idx >= cq_host->num_slots)
+		cq_host->thist_idx = 0;
+
+	cq_host->thist[cq_host->thist_idx].is_dcmd = is_dcmd;
+	memcpy(&cq_host->thist[cq_host->thist_idx++].task,
+		&task, cq_host->task_desc_len);
+}
+
+static void cmdq_prep_dcmd_desc(struct mmc_host *mmc,
+				   struct mmc_request *mrq)
+{
+	u64 *task_desc = NULL;
+	u64 data = 0;
+	u8 resp_type;
+	u8 *desc;
+	__le64 *dataddr;
+	struct cmdq_host *cq_host = mmc_cmdq_private(mmc);
+	u8 timing;
+
+	if (!(mrq->cmd->flags & MMC_RSP_PRESENT)) {
+		resp_type = 0x0;
+		timing = 0x1;
+	} else {
+		if (mrq->cmd->flags & MMC_RSP_BUSY) {
+			resp_type = 0x3;
+			timing = 0x0;
+		} else {
+			resp_type = 0x2;
+			timing = 0x1;
+		}
+	}
+
+	task_desc = (__le64 __force *)get_desc(cq_host, cq_host->dcmd_slot);
+	memset(task_desc, 0, cq_host->task_desc_len);
+	data |= (VALID(1) |
+		 END(1) |
+		 INT(1) |
+		 QBAR(1) |
+		 ACT(0x5) |
+		 CMD_INDEX(mrq->cmd->opcode) |
+		 CMD_TIMING(timing) | RESP_TYPE(resp_type));
+	*task_desc |= data;
+	desc = (u8 *)task_desc;
+	pr_debug("cmdq: dcmd: cmd: %d timing: %d resp: %d\n",
+		mrq->cmd->opcode, timing, resp_type);
+	dataddr = (__le64 __force *)(desc + 4);
+	dataddr[0] = cpu_to_le64((u64)mrq->cmd->arg);
+	cmdq_log_task_desc_history(cq_host, *task_desc, true);
+	MMC_TRACE(mrq->host,
+		"%s: DCMD: Task: 0x%08x | Args: 0x%08x\n",
+		__func__,
+		lower_32_bits(*task_desc),
+		upper_32_bits(*task_desc));
+}
+
+static void cmdq_pm_qos_vote(struct sdhci_host *host, struct mmc_request *mrq)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+
+	sdhci_msm_pm_qos_cpu_vote(host,
+		msm_host->pdata->pm_qos_data.cmdq_latency, mrq->req->cpu);
+}
+
+static void cmdq_pm_qos_unvote(struct sdhci_host *host, struct mmc_request *mrq)
+{
+	/* use async as we're inside an atomic context (soft-irq) */
+	sdhci_msm_pm_qos_cpu_unvote(host, mrq->req->cpu, true);
+}
+
+static int cmdq_request(struct mmc_host *mmc, struct mmc_request *mrq)
+{
+	int err = 0;
+	u64 data = 0;
+	u64 *task_desc = NULL;
+	u32 tag = mrq->cmdq_req->tag;
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	if (!cq_host->enabled) {
+		pr_err("%s: CMDQ host not enabled yet !!!\n",
+		       mmc_hostname(mmc));
+		err = -EINVAL;
+		goto out;
+	}
+
+	cmdq_runtime_pm_get(cq_host);
+
+	if (mrq->cmdq_req->cmdq_req_flags & DCMD) {
+		cmdq_prep_dcmd_desc(mmc, mrq);
+		cq_host->mrq_slot[DCMD_SLOT] = mrq;
+		/* DCMD's are always issued on a fixed slot */
+		tag = DCMD_SLOT;
+		goto ring_doorbell;
+	}
+
+	task_desc = (__le64 __force *)get_desc(cq_host, tag);
+
+	cmdq_prep_task_desc(mrq, &data, 1,
+			    (mrq->cmdq_req->cmdq_req_flags & QBR));
+	*task_desc = cpu_to_le64(data);
+	cmdq_log_task_desc_history(cq_host, *task_desc, false);
+
+	err = cmdq_prep_tran_desc(mrq, cq_host, tag);
+	if (err) {
+		pr_err("%s: %s: failed to setup tx desc: %d\n",
+		       mmc_hostname(mmc), __func__, err);
+		goto out;
+	}
+
+	cq_host->mrq_slot[tag] = mrq;
+
+	/* PM QoS */
+	sdhci_msm_pm_qos_irq_vote(host);
+	cmdq_pm_qos_vote(host, mrq);
+ring_doorbell:
+	/* Ensure the task descriptor list is flushed before ringing doorbell */
+	wmb();
+	if (cmdq_readl(cq_host, CQTDBR) & (1 << tag)) {
+		cmdq_dumpregs(cq_host);
+		BUG_ON(1);
+	}
+	MMC_TRACE(mmc, "%s: tag: %d\n", __func__, tag);
+	cmdq_writel(cq_host, 1 << tag, CQTDBR);
+	/* Commit the doorbell write immediately */
+	wmb();
+
+out:
+	return err;
+}
+
+static void cmdq_finish_data(struct mmc_host *mmc, unsigned int tag)
+{
+	struct mmc_request *mrq;
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+
+	mrq = get_req_by_tag(cq_host, tag);
+	if (tag == cq_host->dcmd_slot)
+		mrq->cmd->resp[0] = cmdq_readl(cq_host, CQCRDCT);
+
+	if (mrq->cmdq_req->cmdq_req_flags & DCMD)
+		cmdq_writel(cq_host, cmdq_readl(cq_host, CQ_VENDOR_CFG) |
+			    CMDQ_SEND_STATUS_TRIGGER, CQ_VENDOR_CFG);
+
+	cmdq_runtime_pm_put(cq_host);
+	mrq->done(mrq);
+}
+
+irqreturn_t cmdq_irq(struct mmc_host *mmc, int err)
+{
+	u32 status;
+	unsigned long tag = 0, comp_status;
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+	unsigned long err_info = 0;
+	struct mmc_request *mrq;
+	int ret;
+	u32 dbr_set = 0;
+
+	status = cmdq_readl(cq_host, CQIS);
+
+	if (!status && !err)
+		return IRQ_NONE;
+	MMC_TRACE(mmc, "%s: CQIS: 0x%x err: %d\n",
+		__func__, status, err);
+
+	if (err || (status & CQIS_RED)) {
+		err_info = cmdq_readl(cq_host, CQTERRI);
+		pr_err("%s: err: %d status: 0x%08x task-err-info (0x%08lx)\n",
+		       mmc_hostname(mmc), err, status, err_info);
+
+		/*
+		 * Need to halt CQE in case of error in interrupt context itself
+		 * otherwise CQE may proceed with sending CMD to device even if
+		 * CQE/card is in error state.
+		 * CMDQ error handling will make sure that it is unhalted after
+		 * handling all the errors.
+		 */
+		ret = cmdq_halt_poll(mmc, true);
+		if (ret)
+			pr_err("%s: %s: halt failed ret=%d\n",
+					mmc_hostname(mmc), __func__, ret);
+
+		/*
+		 * Clear the CQIS after halting incase of error. This is done
+		 * because if CQIS is cleared before halting, the CQ will
+		 * continue with issueing commands for rest of requests with
+		 * Doorbell rung. This will overwrite the Resp Arg register.
+		 * So CQ must be halted first and then CQIS cleared incase
+		 * of error
+		 */
+		cmdq_writel(cq_host, status, CQIS);
+
+		cmdq_dumpregs(cq_host);
+
+		if (!err_info) {
+			/*
+			 * It may so happen sometimes for few errors(like ADMA)
+			 * that HW cannot give CQTERRI info.
+			 * Thus below is a HW WA for recovering from such
+			 * scenario.
+			 * - To halt/disable CQE and do reset_all.
+			 *   Since there is no way to know which tag would
+			 *   have caused such error, so check for any first
+			 *   bit set in doorbell and proceed with an error.
+			 */
+			dbr_set = cmdq_readl(cq_host, CQTDBR);
+			if (!dbr_set) {
+				pr_err("%s: spurious/force error interrupt\n",
+						mmc_hostname(mmc));
+				cmdq_halt_poll(mmc, false);
+				mmc_host_clr_halt(mmc);
+				return IRQ_HANDLED;
+			}
+
+			tag = ffs(dbr_set) - 1;
+			pr_err("%s: error tag selected: tag = %lu\n",
+					mmc_hostname(mmc), tag);
+			mrq = get_req_by_tag(cq_host, tag);
+			if (mrq->data)
+				mrq->data->error = err;
+			else
+				mrq->cmd->error = err;
+			/*
+			 * Get ADMA descriptor memory in case of ADMA
+			 * error for debug.
+			 */
+			if (err == -EIO)
+				cmdq_dump_adma_mem(cq_host);
+			goto skip_cqterri;
+		}
+
+		if (err_info & CQ_RMEFV) {
+			tag = GET_CMD_ERR_TAG(err_info);
+			pr_err("%s: CMD err tag: %lu\n", __func__, tag);
+
+			mrq = get_req_by_tag(cq_host, tag);
+			/* CMD44/45/46/47 will not have a valid cmd */
+			if (mrq->cmd)
+				mrq->cmd->error = err;
+			else
+				mrq->data->error = err;
+		} else {
+			tag = GET_DAT_ERR_TAG(err_info);
+			pr_err("%s: Dat err  tag: %lu\n", __func__, tag);
+			mrq = get_req_by_tag(cq_host, tag);
+			mrq->data->error = err;
+		}
+
+skip_cqterri:
+		/*
+		 * If CQE halt fails then, disable CQE
+		 * from processing any further requests
+		 */
+		if (ret) {
+			cmdq_disable_nosync(mmc, true);
+			/*
+			 * Enable legacy interrupts as CQE halt has failed.
+			 * This is needed to send legacy commands like status
+			 * cmd as part of error handling work.
+			 */
+			if (cq_host->ops->clear_set_irqs)
+				cq_host->ops->clear_set_irqs(mmc, false);
+		}
+
+		/*
+		 * CQE detected a response error from device
+		 * In most cases, this would require a reset.
+		 */
+		if (status & CQIS_RED) {
+			/*
+			 * will check if the RED error is due to a bkops
+			 * exception once the queue is empty
+			 */
+			BUG_ON(!mmc->card);
+			if (mmc_card_configured_manual_bkops(mmc->card) ||
+			    mmc_card_configured_auto_bkops(mmc->card))
+				mmc->card->bkops.needs_check = true;
+
+			mrq->cmdq_req->resp_err = true;
+			pr_err("%s: Response error (0x%08x) from card !!!",
+				mmc_hostname(mmc), cmdq_readl(cq_host, CQCRA));
+
+		} else {
+			mrq->cmdq_req->resp_idx = cmdq_readl(cq_host, CQCRI);
+			mrq->cmdq_req->resp_arg = cmdq_readl(cq_host, CQCRA);
+		}
+
+		cmdq_finish_data(mmc, tag);
+	} else {
+		cmdq_writel(cq_host, status, CQIS);
+	}
+
+	if (status & CQIS_TCC) {
+		/* read CQTCN and complete the request */
+		comp_status = cmdq_readl(cq_host, CQTCN);
+		if (!comp_status)
+			goto out;
+		/*
+		 * The CQTCN must be cleared before notifying req completion
+		 * to upper layers to avoid missing completion notification
+		 * of new requests with the same tag.
+		 */
+		cmdq_writel(cq_host, comp_status, CQTCN);
+		/*
+		 * A write memory barrier is necessary to guarantee that CQTCN
+		 * gets cleared first before next doorbell for the same tag is
+		 * set but that is already achieved by the barrier present
+		 * before setting doorbell, hence one is not needed here.
+		 */
+		for_each_set_bit(tag, &comp_status, cq_host->num_slots) {
+			/* complete the corresponding mrq */
+			pr_debug("%s: completing tag -> %lu\n",
+				 mmc_hostname(mmc), tag);
+			MMC_TRACE(mmc, "%s: completing tag -> %lu\n",
+				__func__, tag);
+				cmdq_finish_data(mmc, tag);
+		}
+	}
+
+	if (status & CQIS_HAC) {
+		if (cq_host->ops->post_cqe_halt)
+			cq_host->ops->post_cqe_halt(mmc);
+		/* halt done: re-enable legacy interrupts */
+		if (cq_host->ops->clear_set_irqs)
+			cq_host->ops->clear_set_irqs(mmc, false);
+		/* halt is completed, wakeup waiting thread */
+		complete(&cq_host->halt_comp);
+	}
+
+out:
+	return IRQ_HANDLED;
+}
+EXPORT_SYMBOL(cmdq_irq);
+
+/* cmdq_halt_poll - Halting CQE using polling method.
+ * @mmc: struct mmc_host
+ * @halt: bool halt
+ * This is used mainly from interrupt context to halt/unhalt
+ * CQE engine.
+ */
+static int cmdq_halt_poll(struct mmc_host *mmc, bool halt)
+{
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+	int retries = 100;
+
+	if (!halt) {
+		if (cq_host->ops->set_data_timeout)
+			cq_host->ops->set_data_timeout(mmc, 0xf);
+		if (cq_host->ops->clear_set_irqs)
+			cq_host->ops->clear_set_irqs(mmc, true);
+		cmdq_writel(cq_host, cmdq_readl(cq_host, CQCTL) & ~HALT,
+			    CQCTL);
+		return 0;
+	}
+
+	cmdq_set_halt_irq(cq_host, false);
+	cmdq_writel(cq_host, cmdq_readl(cq_host, CQCTL) | HALT, CQCTL);
+	while (retries) {
+		if (!(cmdq_readl(cq_host, CQCTL) & HALT)) {
+			udelay(5);
+			retries--;
+			continue;
+		} else {
+			if (cq_host->ops->post_cqe_halt)
+				cq_host->ops->post_cqe_halt(mmc);
+			/* halt done: re-enable legacy interrupts */
+			if (cq_host->ops->clear_set_irqs)
+				cq_host->ops->clear_set_irqs(mmc,
+							false);
+			mmc_host_set_halt(mmc);
+			break;
+		}
+	}
+	cmdq_set_halt_irq(cq_host, true);
+	return retries ? 0 : -ETIMEDOUT;
+}
+
+/* May sleep */
+static int cmdq_halt(struct mmc_host *mmc, bool halt)
+{
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+	u32 ret = 0;
+	u32 config = 0;
+	int retries = 3;
+
+	cmdq_runtime_pm_get(cq_host);
+	if (halt) {
+		while (retries) {
+			cmdq_writel(cq_host, cmdq_readl(cq_host, CQCTL) | HALT,
+				    CQCTL);
+			ret = wait_for_completion_timeout(&cq_host->halt_comp,
+					  msecs_to_jiffies(HALT_TIMEOUT_MS));
+			if (!ret) {
+				pr_warn("%s: %s: HAC int timeout\n",
+					mmc_hostname(mmc), __func__);
+				if ((cmdq_readl(cq_host, CQCTL) & HALT)) {
+					/*
+					 * Don't retry if CQE is halted but irq
+					 * is not triggered in timeout period.
+					 * And since we are returning error,
+					 * un-halt CQE. Since irq was not fired
+					 * yet, no need to set other params
+					 */
+					retries = 0;
+					config = cmdq_readl(cq_host, CQCTL);
+					config &= ~HALT;
+					cmdq_writel(cq_host, config, CQCTL);
+				} else {
+					pr_warn("%s: %s: retryng halt (%d)\n",
+						mmc_hostname(mmc), __func__,
+						retries);
+					retries--;
+					continue;
+				}
+			} else {
+				MMC_TRACE(mmc, "%s: halt done , retries: %d\n",
+					__func__, retries);
+				break;
+			}
+		}
+		ret = retries ? 0 : -ETIMEDOUT;
+	} else {
+		if (cq_host->ops->set_transfer_params)
+			cq_host->ops->set_transfer_params(mmc);
+		if (cq_host->ops->set_block_size)
+			cq_host->ops->set_block_size(mmc);
+		if (cq_host->ops->set_data_timeout)
+			cq_host->ops->set_data_timeout(mmc, 0xf);
+		if (cq_host->ops->clear_set_irqs)
+			cq_host->ops->clear_set_irqs(mmc, true);
+		MMC_TRACE(mmc, "%s: unhalt done\n", __func__);
+		cmdq_writel(cq_host, cmdq_readl(cq_host, CQCTL) & ~HALT,
+			    CQCTL);
+	}
+	cmdq_runtime_pm_put(cq_host);
+	return ret;
+}
+
+static void cmdq_post_req(struct mmc_host *mmc, int tag, int err)
+{
+	struct cmdq_host *cq_host;
+	struct mmc_request *mrq;
+	struct mmc_data *data;
+	struct sdhci_host *sdhci_host = mmc_priv(mmc);
+
+	if (WARN_ON(!mmc))
+		return;
+
+	cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+	mrq = get_req_by_tag(cq_host, tag);
+	data = mrq->data;
+
+	if (data) {
+		data->error = err;
+		dma_unmap_sg(mmc_dev(mmc), data->sg, data->sg_len,
+			     (data->flags & MMC_DATA_READ) ?
+			     DMA_FROM_DEVICE : DMA_TO_DEVICE);
+		if (err)
+			data->bytes_xfered = 0;
+		else
+			data->bytes_xfered = blk_rq_bytes(mrq->req);
+
+		/* we're in atomic context (soft-irq) so unvote async. */
+		sdhci_msm_pm_qos_irq_unvote(sdhci_host, true);
+		cmdq_pm_qos_unvote(sdhci_host, mrq);
+	}
+}
+
+static void cmdq_dumpstate(struct mmc_host *mmc)
+{
+	struct cmdq_host *cq_host = (struct cmdq_host *)mmc_cmdq_private(mmc);
+	cmdq_runtime_pm_get(cq_host);
+	cmdq_dumpregs(cq_host);
+	cmdq_runtime_pm_put(cq_host);
+}
+
+static int cmdq_late_init(struct mmc_host *mmc)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+
+	/*
+	 * TODO: This should basically move to something like "sdhci-cmdq-msm"
+	 * for msm specific implementation.
+	 */
+	sdhci_msm_pm_qos_irq_init(host);
+
+	if (msm_host->pdata->pm_qos_data.cmdq_valid)
+		sdhci_msm_pm_qos_cpu_init(host,
+			msm_host->pdata->pm_qos_data.cmdq_latency);
+	return 0;
+}
+
+static const struct mmc_cmdq_host_ops cmdq_host_ops = {
+	.init = cmdq_late_init,
+	.enable = cmdq_enable,
+	.disable = cmdq_disable,
+	.request = cmdq_request,
+	.post_req = cmdq_post_req,
+	.halt = cmdq_halt,
+	.reset	= cmdq_reset,
+	.dumpstate = cmdq_dumpstate,
+};
+
+struct cmdq_host *cmdq_pltfm_init(struct platform_device *pdev)
+{
+	struct cmdq_host *cq_host;
+	struct resource *cmdq_memres = NULL;
+
+	/* check and setup CMDQ interface */
+	cmdq_memres = platform_get_resource_byname(pdev, IORESOURCE_MEM,
+						   "cmdq_mem");
+	if (!cmdq_memres) {
+		dev_dbg(&pdev->dev, "CMDQ not supported\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	cq_host = kzalloc(sizeof(*cq_host), GFP_KERNEL);
+	if (!cq_host) {
+		dev_err(&pdev->dev, "failed to allocate memory for CMDQ\n");
+		return ERR_PTR(-ENOMEM);
+	}
+	cq_host->mmio = devm_ioremap(&pdev->dev,
+				     cmdq_memres->start,
+				     resource_size(cmdq_memres));
+	if (!cq_host->mmio) {
+		dev_err(&pdev->dev, "failed to remap cmdq regs\n");
+		kfree(cq_host);
+		return ERR_PTR(-EBUSY);
+	}
+	dev_dbg(&pdev->dev, "CMDQ ioremap: done\n");
+
+	return cq_host;
+}
+EXPORT_SYMBOL(cmdq_pltfm_init);
+
+int cmdq_init(struct cmdq_host *cq_host, struct mmc_host *mmc,
+	      bool dma64)
+{
+	int err = 0;
+
+	cq_host->dma64 = dma64;
+	cq_host->mmc = mmc;
+	cq_host->mmc->cmdq_private = cq_host;
+
+	cq_host->num_slots = NUM_SLOTS;
+	cq_host->dcmd_slot = DCMD_SLOT;
+
+	mmc->cmdq_ops = &cmdq_host_ops;
+	mmc->num_cq_slots = NUM_SLOTS;
+	mmc->dcmd_cq_slot = DCMD_SLOT;
+
+	cq_host->mrq_slot = kzalloc(sizeof(cq_host->mrq_slot) *
+				    cq_host->num_slots, GFP_KERNEL);
+	if (!cq_host->mrq_slot)
+		return -ENOMEM;
+
+	init_completion(&cq_host->halt_comp);
+	return err;
+}
+EXPORT_SYMBOL(cmdq_init);
diff --git a/drivers/mmc/host/cmdq_hci.h b/drivers/mmc/host/cmdq_hci.h
new file mode 100644
index 0000000..5347b3ab
--- /dev/null
+++ b/drivers/mmc/host/cmdq_hci.h
@@ -0,0 +1,233 @@
+/* Copyright (c) 2015-2016, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef LINUX_MMC_CQ_HCI_H
+#define LINUX_MMC_CQ_HCI_H
+#include <linux/mmc/core.h>
+
+/* registers */
+/* version */
+#define CQVER		0x00
+/* capabilities */
+#define CQCAP		0x04
+/* configuration */
+#define CQCFG		0x08
+#define CQ_DCMD		0x00001000
+#define CQ_TASK_DESC_SZ 0x00000100
+#define CQ_ENABLE	0x00000001
+
+/* control */
+#define CQCTL		0x0C
+#define CLEAR_ALL_TASKS 0x00000100
+#define HALT		0x00000001
+
+/* interrupt status */
+#define CQIS		0x10
+#define CQIS_HAC	(1 << 0)
+#define CQIS_TCC	(1 << 1)
+#define CQIS_RED	(1 << 2)
+#define CQIS_TCL	(1 << 3)
+
+/* interrupt status enable */
+#define CQISTE		0x14
+
+/* interrupt signal enable */
+#define CQISGE		0x18
+
+/* interrupt coalescing */
+#define CQIC		0x1C
+#define CQIC_ENABLE	(1 << 31)
+#define CQIC_RESET	(1 << 16)
+#define CQIC_ICCTHWEN	(1 << 15)
+#define CQIC_ICCTH(x)	((x & 0x1F) << 8)
+#define CQIC_ICTOVALWEN (1 << 7)
+#define CQIC_ICTOVAL(x) (x & 0x7F)
+
+/* task list base address */
+#define CQTDLBA		0x20
+
+/* task list base address upper */
+#define CQTDLBAU	0x24
+
+/* door-bell */
+#define CQTDBR		0x28
+
+/* task completion notification */
+#define CQTCN		0x2C
+
+/* device queue status */
+#define CQDQS		0x30
+
+/* device pending tasks */
+#define CQDPT		0x34
+
+/* task clear */
+#define CQTCLR		0x38
+
+/* send status config 1 */
+#define CQSSC1		0x40
+/*
+ * Value n means CQE would send CMD13 during the transfer of data block
+ * BLOCK_CNT-n
+ */
+#define SEND_QSR_INTERVAL 0x70001
+
+/* send status config 2 */
+#define CQSSC2		0x44
+
+/* response for dcmd */
+#define CQCRDCT		0x48
+
+/* response mode error mask */
+#define CQRMEM		0x50
+#define CQ_EXCEPTION	(1 << 6)
+
+/* task error info */
+#define CQTERRI		0x54
+
+/* CQTERRI bit fields */
+#define CQ_RMECI	0x1F
+#define CQ_RMETI	(0x1F << 8)
+#define CQ_RMEFV	(1 << 15)
+#define CQ_DTECI	(0x3F << 16)
+#define CQ_DTETI	(0x1F << 24)
+#define CQ_DTEFV	(1 << 31)
+
+#define GET_CMD_ERR_TAG(__r__) ((__r__ & CQ_RMETI) >> 8)
+#define GET_DAT_ERR_TAG(__r__) ((__r__ & CQ_DTETI) >> 24)
+
+/* command response index */
+#define CQCRI		0x58
+
+/* command response argument */
+#define CQCRA		0x5C
+
+#define CQ_INT_ALL	0xF
+#define CQIC_DEFAULT_ICCTH 31
+#define CQIC_DEFAULT_ICTOVAL 1
+
+/* attribute fields */
+#define VALID(x)	((x & 1) << 0)
+#define END(x)		((x & 1) << 1)
+#define INT(x)		((x & 1) << 2)
+#define ACT(x)		((x & 0x7) << 3)
+
+/* data command task descriptor fields */
+#define FORCED_PROG(x)	((x & 1) << 6)
+#define CONTEXT(x)	((x & 0xF) << 7)
+#define DATA_TAG(x)	((x & 1) << 11)
+#define DATA_DIR(x)	((x & 1) << 12)
+#define PRIORITY(x)	((x & 1) << 13)
+#define QBAR(x)		((x & 1) << 14)
+#define REL_WRITE(x)	((x & 1) << 15)
+#define BLK_COUNT(x)	((x & 0xFFFF) << 16)
+#define BLK_ADDR(x)	((x & 0xFFFFFFFF) << 32)
+
+/* direct command task descriptor fields */
+#define CMD_INDEX(x)	((x & 0x3F) << 16)
+#define CMD_TIMING(x)	((x & 1) << 22)
+#define RESP_TYPE(x)	((x & 0x3) << 23)
+
+/* transfer descriptor fields */
+#define DAT_LENGTH(x)	((x & 0xFFFF) << 16)
+#define DAT_ADDR_LO(x)	((x & 0xFFFFFFFF) << 32)
+#define DAT_ADDR_HI(x)	((x & 0xFFFFFFFF) << 0)
+
+#define CQ_VENDOR_CFG	0x100
+#define CMDQ_SEND_STATUS_TRIGGER (1 << 31)
+
+struct task_history {
+	u64 task;
+	bool is_dcmd;
+};
+
+struct cmdq_host {
+	const struct cmdq_host_ops *ops;
+	void __iomem *mmio;
+	struct mmc_host *mmc;
+
+	/* 64 bit DMA */
+	bool dma64;
+	int num_slots;
+
+	u32 dcmd_slot;
+	u32 caps;
+#define CMDQ_TASK_DESC_SZ_128 0x1
+
+	u32 quirks;
+#define CMDQ_QUIRK_SHORT_TXFR_DESC_SZ 0x1
+#define CMDQ_QUIRK_NO_DCMD	0x2
+
+	bool enabled;
+	bool halted;
+	bool init_done;
+
+	u8 *desc_base;
+
+	/* total descriptor size */
+	u8 slot_sz;
+
+	/* 64/128 bit depends on CQCFG */
+	u8 task_desc_len;
+
+	/* 64 bit on 32-bit arch, 128 bit on 64-bit */
+	u8 link_desc_len;
+
+	u8 *trans_desc_base;
+	/* same length as transfer descriptor */
+	u8 trans_desc_len;
+
+	dma_addr_t desc_dma_base;
+	dma_addr_t trans_desc_dma_base;
+
+	struct task_history *thist;
+	u8 thist_idx;
+
+	struct completion halt_comp;
+	struct mmc_request **mrq_slot;
+	void *private;
+};
+
+struct cmdq_host_ops {
+	void (*set_transfer_params)(struct mmc_host *mmc);
+	void (*set_data_timeout)(struct mmc_host *mmc, u32 val);
+	void (*clear_set_irqs)(struct mmc_host *mmc, bool clear);
+	void (*set_block_size)(struct mmc_host *mmc);
+	void (*dump_vendor_regs)(struct mmc_host *mmc);
+	void (*write_l)(struct cmdq_host *host, u32 val, int reg);
+	u32 (*read_l)(struct cmdq_host *host, int reg);
+	void (*clear_set_dumpregs)(struct mmc_host *mmc, bool set);
+	void (*enhanced_strobe_mask)(struct mmc_host *mmc, bool set);
+	int (*reset)(struct mmc_host *mmc);
+	void (*post_cqe_halt)(struct mmc_host *mmc);
+};
+
+static inline void cmdq_writel(struct cmdq_host *host, u32 val, int reg)
+{
+	if (unlikely(host->ops && host->ops->write_l))
+		host->ops->write_l(host, val, reg);
+	else
+		writel_relaxed(val, host->mmio + reg);
+}
+
+static inline u32 cmdq_readl(struct cmdq_host *host, int reg)
+{
+	if (unlikely(host->ops && host->ops->read_l))
+		return host->ops->read_l(host, reg);
+	else
+		return readl_relaxed(host->mmio + reg);
+}
+
+extern irqreturn_t cmdq_irq(struct mmc_host *mmc, int err);
+extern int cmdq_init(struct cmdq_host *cq_host, struct mmc_host *mmc,
+		     bool dma64);
+extern struct cmdq_host *cmdq_pltfm_init(struct platform_device *pdev);
+#endif
diff --git a/drivers/mmc/host/sdhci-msm-ice.c b/drivers/mmc/host/sdhci-msm-ice.c
new file mode 100644
index 0000000..ba6e51c
--- /dev/null
+++ b/drivers/mmc/host/sdhci-msm-ice.c
@@ -0,0 +1,444 @@
+/*
+ * Copyright (c) 2015, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include "sdhci-msm-ice.h"
+
+static void sdhci_msm_ice_success_cb(void *host_ctrl,
+				enum ice_event_completion evt)
+{
+	struct sdhci_msm_host *msm_host = (struct sdhci_msm_host *)host_ctrl;
+
+	if ((msm_host->ice.state == SDHCI_MSM_ICE_STATE_DISABLED &&
+	    evt == ICE_INIT_COMPLETION) || (msm_host->ice.state ==
+	    SDHCI_MSM_ICE_STATE_SUSPENDED && evt == ICE_RESUME_COMPLETION))
+		msm_host->ice.state = SDHCI_MSM_ICE_STATE_ACTIVE;
+
+	complete(&msm_host->ice.async_done);
+}
+
+static void sdhci_msm_ice_error_cb(void *host_ctrl, u32 error)
+{
+	struct sdhci_msm_host *msm_host = (struct sdhci_msm_host *)host_ctrl;
+
+	dev_err(&msm_host->pdev->dev, "%s: Error in ice operation 0x%x",
+		__func__, error);
+
+	if (msm_host->ice.state == SDHCI_MSM_ICE_STATE_ACTIVE)
+		msm_host->ice.state = SDHCI_MSM_ICE_STATE_DISABLED;
+
+	complete(&msm_host->ice.async_done);
+}
+
+static struct platform_device *sdhci_msm_ice_get_pdevice(struct device *dev)
+{
+	struct device_node *node;
+	struct platform_device *ice_pdev = NULL;
+
+	node = of_parse_phandle(dev->of_node, SDHC_MSM_CRYPTO_LABEL, 0);
+	if (!node) {
+		dev_dbg(dev, "%s: sdhc-msm-crypto property not specified\n",
+			__func__);
+		goto out;
+	}
+	ice_pdev = qcom_ice_get_pdevice(node);
+out:
+	return ice_pdev;
+}
+
+static
+struct qcom_ice_variant_ops *sdhci_msm_ice_get_vops(struct device *dev)
+{
+	struct qcom_ice_variant_ops *ice_vops = NULL;
+	struct device_node *node;
+
+	node = of_parse_phandle(dev->of_node, SDHC_MSM_CRYPTO_LABEL, 0);
+	if (!node) {
+		dev_dbg(dev, "%s: sdhc-msm-crypto property not specified\n",
+			__func__);
+		goto out;
+	}
+	ice_vops = qcom_ice_get_variant_ops(node);
+	of_node_put(node);
+out:
+	return ice_vops;
+}
+
+static
+void sdhci_msm_enable_ice_hci(struct sdhci_host *host, bool enable)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	u32 config = 0;
+	u32 ice_cap = 0;
+
+	/*
+	 * Enable the cryptographic support inside SDHC.
+	 * This is a global config which needs to be enabled
+	 * all the time.
+	 * Only when it it is enabled, the ICE_HCI capability
+	 * will get reflected in CQCAP register.
+	 */
+	config = readl_relaxed(host->ioaddr + HC_VENDOR_SPECIFIC_FUNC4);
+
+	if (enable)
+		config &= ~DISABLE_CRYPTO;
+	else
+		config |= DISABLE_CRYPTO;
+	writel_relaxed(config, host->ioaddr + HC_VENDOR_SPECIFIC_FUNC4);
+
+	/*
+	 * CQCAP register is in different register space from above
+	 * ice global enable register. So a mb() is required to ensure
+	 * above write gets completed before reading the CQCAP register.
+	 */
+	mb();
+
+	/*
+	 * Check if ICE HCI capability support is present
+	 * If present, enable it.
+	 */
+	ice_cap = readl_relaxed(msm_host->cryptoio + ICE_CQ_CAPABILITIES);
+	if (ice_cap & ICE_HCI_SUPPORT) {
+		config = readl_relaxed(msm_host->cryptoio + ICE_CQ_CONFIG);
+
+		if (enable)
+			config |= CRYPTO_GENERAL_ENABLE;
+		else
+			config &= ~CRYPTO_GENERAL_ENABLE;
+		writel_relaxed(config, msm_host->cryptoio + ICE_CQ_CONFIG);
+	}
+}
+
+int sdhci_msm_ice_get_dev(struct sdhci_host *host)
+{
+	struct device *sdhc_dev;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+
+	if (!msm_host || !msm_host->pdev) {
+		pr_err("%s: invalid msm_host %p or msm_host->pdev\n",
+			__func__, msm_host);
+		return -EINVAL;
+	}
+
+	sdhc_dev = &msm_host->pdev->dev;
+	msm_host->ice.vops  = sdhci_msm_ice_get_vops(sdhc_dev);
+	msm_host->ice.pdev = sdhci_msm_ice_get_pdevice(sdhc_dev);
+
+	if (msm_host->ice.pdev == ERR_PTR(-EPROBE_DEFER)) {
+		dev_err(sdhc_dev, "%s: ICE device not probed yet\n",
+			__func__);
+		msm_host->ice.pdev = NULL;
+		msm_host->ice.vops = NULL;
+		return -EPROBE_DEFER;
+	}
+
+	if (!msm_host->ice.pdev) {
+		dev_dbg(sdhc_dev, "%s: invalid platform device\n", __func__);
+		msm_host->ice.vops = NULL;
+		return -ENODEV;
+	}
+	if (!msm_host->ice.vops) {
+		dev_dbg(sdhc_dev, "%s: invalid ice vops\n", __func__);
+		msm_host->ice.pdev = NULL;
+		return -ENODEV;
+	}
+	msm_host->ice.state = SDHCI_MSM_ICE_STATE_DISABLED;
+	return 0;
+}
+
+static
+int sdhci_msm_ice_pltfm_init(struct sdhci_msm_host *msm_host)
+{
+	struct resource *ice_memres = NULL;
+	struct platform_device *pdev = msm_host->pdev;
+	int err = 0;
+
+	if (!msm_host->ice_hci_support)
+		goto out;
+	/*
+	 * ICE HCI registers are present in cmdq register space.
+	 * So map the cmdq mem for accessing ICE HCI registers.
+	 */
+	ice_memres = platform_get_resource_byname(pdev,
+						IORESOURCE_MEM, "cmdq_mem");
+	if (!ice_memres) {
+		dev_err(&pdev->dev, "Failed to get iomem resource for ice\n");
+		err = -EINVAL;
+		goto out;
+	}
+	msm_host->cryptoio = devm_ioremap(&pdev->dev,
+					ice_memres->start,
+					resource_size(ice_memres));
+	if (!msm_host->cryptoio) {
+		dev_err(&pdev->dev, "Failed to remap registers\n");
+		err = -ENOMEM;
+	}
+out:
+	return err;
+}
+
+int sdhci_msm_ice_init(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int err = 0;
+
+	init_completion(&msm_host->ice.async_done);
+	if (msm_host->ice.vops->config) {
+		err = msm_host->ice.vops->init(msm_host->ice.pdev,
+					msm_host,
+					sdhci_msm_ice_success_cb,
+					sdhci_msm_ice_error_cb);
+		if (err) {
+			pr_err("%s: ice init err %d\n",
+				mmc_hostname(host->mmc), err);
+			return err;
+		}
+	}
+
+	if (!wait_for_completion_timeout(&msm_host->ice.async_done,
+		msecs_to_jiffies(SDHCI_MSM_ICE_COMPLETION_TIMEOUT_MS))) {
+		pr_err("%s: ice init timedout after %d ms\n",
+				mmc_hostname(host->mmc),
+				SDHCI_MSM_ICE_COMPLETION_TIMEOUT_MS);
+		sdhci_msm_ice_print_regs(host);
+		return -ETIMEDOUT;
+	}
+
+	if (msm_host->ice.state != SDHCI_MSM_ICE_STATE_ACTIVE) {
+		pr_err("%s: ice is in invalid state %d\n",
+			mmc_hostname(host->mmc), msm_host->ice.state);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int sdhci_msm_ice_cfg(struct sdhci_host *host, struct mmc_request *mrq,
+			u32 slot)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int err = 0;
+	struct ice_data_setting ice_set;
+	sector_t lba = 0;
+	unsigned int ctrl_info_val = 0;
+	unsigned int bypass = SDHCI_MSM_ICE_ENABLE_BYPASS;
+	struct request *req;
+
+	if (msm_host->ice.state != SDHCI_MSM_ICE_STATE_ACTIVE) {
+		pr_err("%s: ice is in invalid state %d\n",
+			mmc_hostname(host->mmc), msm_host->ice.state);
+		return -EINVAL;
+	}
+
+	BUG_ON(!mrq);
+	memset(&ice_set, 0, sizeof(struct ice_data_setting));
+	req = mrq->req;
+	if (req) {
+		lba = req->__sector;
+		if (msm_host->ice.vops->config) {
+			err = msm_host->ice.vops->config(msm_host->ice.pdev,
+							req, &ice_set);
+			if (err) {
+				pr_err("%s: ice config failed %d\n",
+						mmc_hostname(host->mmc), err);
+				return err;
+			}
+		}
+		/* if writing data command */
+		if (rq_data_dir(req) == WRITE)
+			bypass = ice_set.encr_bypass ?
+					SDHCI_MSM_ICE_ENABLE_BYPASS :
+					SDHCI_MSM_ICE_DISABLE_BYPASS;
+		/* if reading data command */
+		else if (rq_data_dir(req) == READ)
+			bypass = ice_set.decr_bypass ?
+					SDHCI_MSM_ICE_ENABLE_BYPASS :
+					SDHCI_MSM_ICE_DISABLE_BYPASS;
+		pr_debug("%s: %s: slot %d encr_bypass %d bypass %d decr_bypass %d key_index %d\n",
+				mmc_hostname(host->mmc),
+				(rq_data_dir(req) == WRITE) ? "WRITE" : "READ",
+				slot, ice_set.encr_bypass, bypass,
+				ice_set.decr_bypass,
+				ice_set.crypto_data.key_index);
+	}
+
+	/* Configure ICE index */
+	ctrl_info_val =
+		(ice_set.crypto_data.key_index &
+		 MASK_SDHCI_MSM_ICE_CTRL_INFO_KEY_INDEX)
+		 << OFFSET_SDHCI_MSM_ICE_CTRL_INFO_KEY_INDEX;
+
+	/* Configure data unit size of transfer request */
+	ctrl_info_val |=
+		(SDHCI_MSM_ICE_TR_DATA_UNIT_512_B &
+		 MASK_SDHCI_MSM_ICE_CTRL_INFO_CDU)
+		 << OFFSET_SDHCI_MSM_ICE_CTRL_INFO_CDU;
+
+	/* Configure ICE bypass mode */
+	ctrl_info_val |=
+		(bypass & MASK_SDHCI_MSM_ICE_CTRL_INFO_BYPASS)
+		 << OFFSET_SDHCI_MSM_ICE_CTRL_INFO_BYPASS;
+
+	writel_relaxed((lba & 0xFFFFFFFF),
+		host->ioaddr + CORE_VENDOR_SPEC_ICE_CTRL_INFO_1_n + 16 * slot);
+	writel_relaxed(((lba >> 32) & 0xFFFFFFFF),
+		host->ioaddr + CORE_VENDOR_SPEC_ICE_CTRL_INFO_2_n + 16 * slot);
+	writel_relaxed(ctrl_info_val,
+		host->ioaddr + CORE_VENDOR_SPEC_ICE_CTRL_INFO_3_n + 16 * slot);
+
+	/* Ensure ICE registers are configured before issuing SDHCI request */
+	mb();
+	return 0;
+}
+
+int sdhci_msm_ice_reset(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int err = 0;
+
+	if (msm_host->ice.state != SDHCI_MSM_ICE_STATE_ACTIVE) {
+		pr_err("%s: ice is in invalid state before reset %d\n",
+			mmc_hostname(host->mmc), msm_host->ice.state);
+		return -EINVAL;
+	}
+
+	init_completion(&msm_host->ice.async_done);
+
+	if (msm_host->ice.vops->reset) {
+		err = msm_host->ice.vops->reset(msm_host->ice.pdev);
+		if (err) {
+			pr_err("%s: ice reset failed %d\n",
+					mmc_hostname(host->mmc), err);
+			return err;
+		}
+	}
+
+	if (!wait_for_completion_timeout(&msm_host->ice.async_done,
+	     msecs_to_jiffies(SDHCI_MSM_ICE_COMPLETION_TIMEOUT_MS))) {
+		pr_err("%s: ice reset timedout after %d ms\n",
+			mmc_hostname(host->mmc),
+			SDHCI_MSM_ICE_COMPLETION_TIMEOUT_MS);
+		sdhci_msm_ice_print_regs(host);
+		return -ETIMEDOUT;
+	}
+
+	if (msm_host->ice.state != SDHCI_MSM_ICE_STATE_ACTIVE) {
+		pr_err("%s: ice is in invalid state after reset %d\n",
+			mmc_hostname(host->mmc), msm_host->ice.state);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int sdhci_msm_ice_resume(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int err = 0;
+
+	if (msm_host->ice.state !=
+			SDHCI_MSM_ICE_STATE_SUSPENDED) {
+		pr_err("%s: ice is in invalid state before resume %d\n",
+			mmc_hostname(host->mmc), msm_host->ice.state);
+		return -EINVAL;
+	}
+
+	init_completion(&msm_host->ice.async_done);
+
+	if (msm_host->ice.vops->resume) {
+		err = msm_host->ice.vops->resume(msm_host->ice.pdev);
+		if (err) {
+			pr_err("%s: ice resume failed %d\n",
+					mmc_hostname(host->mmc), err);
+			return err;
+		}
+	}
+
+	if (!wait_for_completion_timeout(&msm_host->ice.async_done,
+		msecs_to_jiffies(SDHCI_MSM_ICE_COMPLETION_TIMEOUT_MS))) {
+		pr_err("%s: ice resume timedout after %d ms\n",
+			mmc_hostname(host->mmc),
+			SDHCI_MSM_ICE_COMPLETION_TIMEOUT_MS);
+		sdhci_msm_ice_print_regs(host);
+		return -ETIMEDOUT;
+	}
+
+	if (msm_host->ice.state != SDHCI_MSM_ICE_STATE_ACTIVE) {
+		pr_err("%s: ice is in invalid state after resume %d\n",
+			mmc_hostname(host->mmc), msm_host->ice.state);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int sdhci_msm_ice_suspend(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int err = 0;
+
+	if (msm_host->ice.state !=
+			SDHCI_MSM_ICE_STATE_ACTIVE) {
+		pr_err("%s: ice is in invalid state before resume %d\n",
+			mmc_hostname(host->mmc), msm_host->ice.state);
+		return -EINVAL;
+	}
+
+	if (msm_host->ice.vops->suspend) {
+		err = msm_host->ice.vops->suspend(msm_host->ice.pdev);
+		if (err) {
+			pr_err("%s: ice suspend failed %d\n",
+					mmc_hostname(host->mmc), err);
+			return -EINVAL;
+		}
+	}
+	msm_host->ice.state = SDHCI_MSM_ICE_STATE_SUSPENDED;
+	return 0;
+}
+
+int sdhci_msm_ice_get_status(struct sdhci_host *host, int *ice_status)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int stat = -EINVAL;
+
+	if (msm_host->ice.state != SDHCI_MSM_ICE_STATE_ACTIVE) {
+		pr_err("%s: ice is in invalid state %d\n",
+			mmc_hostname(host->mmc), msm_host->ice.state);
+		return -EINVAL;
+	}
+
+	if (msm_host->ice.vops->status) {
+		*ice_status = 0;
+		stat = msm_host->ice.vops->status(msm_host->ice.pdev);
+		if (stat < 0) {
+			pr_err("%s: ice get sts failed %d\n",
+					mmc_hostname(host->mmc), stat);
+			return -EINVAL;
+		}
+		*ice_status = stat;
+	}
+	return 0;
+}
+
+void sdhci_msm_ice_print_regs(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+
+	if (msm_host->ice.vops->debug)
+		msm_host->ice.vops->debug(msm_host->ice.pdev);
+}
diff --git a/drivers/mmc/host/sdhci-msm-ice.h b/drivers/mmc/host/sdhci-msm-ice.h
new file mode 100644
index 0000000..88ef0e2
--- /dev/null
+++ b/drivers/mmc/host/sdhci-msm-ice.h
@@ -0,0 +1,150 @@
+/*
+ * Copyright (c) 2015, 2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __SDHCI_MSM_ICE_H__
+#define __SDHCI_MSM_ICE_H__
+
+#include <linux/io.h>
+#include <linux/of.h>
+#include <linux/async.h>
+#include <linux/blkdev.h>
+#include <crypto/ice.h>
+
+#include "sdhci-msm.h"
+
+#define SDHC_MSM_CRYPTO_LABEL "sdhc-msm-crypto"
+/* Timeout waiting for ICE initialization, that requires TZ access */
+#define SDHCI_MSM_ICE_COMPLETION_TIMEOUT_MS	500
+
+/*
+ * SDHCI host controller ICE registers. There are n [0..31]
+ * of each of these registers
+ */
+#define NUM_SDHCI_MSM_ICE_CTRL_INFO_n_REGS	32
+
+#define CORE_VENDOR_SPEC_ICE_CTRL		0x300
+#define CORE_VENDOR_SPEC_ICE_CTRL_INFO_1_n	0x304
+#define CORE_VENDOR_SPEC_ICE_CTRL_INFO_2_n	0x308
+#define CORE_VENDOR_SPEC_ICE_CTRL_INFO_3_n	0x30C
+
+/* ICE3.0 register which got added cmdq reg space */
+#define ICE_CQ_CAPABILITIES	0x04
+#define ICE_HCI_SUPPORT		(1 << 28)
+#define ICE_CQ_CONFIG		0x08
+#define CRYPTO_GENERAL_ENABLE	(1 << 1)
+
+/* ICE3.0 register which got added hc reg space */
+#define HC_VENDOR_SPECIFIC_FUNC4	0x260
+#define DISABLE_CRYPTO			(1 << 15)
+#define HC_VENDOR_SPECIFIC_ICE_CTRL	0x800
+#define ICE_SW_RST_EN			(1 << 0)
+
+/* SDHCI MSM ICE CTRL Info register offset */
+enum {
+	OFFSET_SDHCI_MSM_ICE_CTRL_INFO_BYPASS     = 0,
+	OFFSET_SDHCI_MSM_ICE_CTRL_INFO_KEY_INDEX  = 0x1,
+	OFFSET_SDHCI_MSM_ICE_CTRL_INFO_CDU        = 0x6,
+};
+
+/* SDHCI MSM ICE CTRL Info register masks */
+enum {
+	MASK_SDHCI_MSM_ICE_CTRL_INFO_BYPASS     = 0x1,
+	MASK_SDHCI_MSM_ICE_CTRL_INFO_KEY_INDEX  = 0x1F,
+	MASK_SDHCI_MSM_ICE_CTRL_INFO_CDU        = 0x7,
+};
+
+/* SDHCI MSM ICE encryption/decryption bypass state */
+enum {
+	SDHCI_MSM_ICE_DISABLE_BYPASS  = 0,
+	SDHCI_MSM_ICE_ENABLE_BYPASS = 1,
+};
+
+/* SDHCI MSM ICE Crypto Data Unit of target DUN of Transfer Request */
+enum {
+	SDHCI_MSM_ICE_TR_DATA_UNIT_512_B          = 0,
+	SDHCI_MSM_ICE_TR_DATA_UNIT_1_KB           = 1,
+	SDHCI_MSM_ICE_TR_DATA_UNIT_2_KB           = 2,
+	SDHCI_MSM_ICE_TR_DATA_UNIT_4_KB           = 3,
+	SDHCI_MSM_ICE_TR_DATA_UNIT_8_KB           = 4,
+	SDHCI_MSM_ICE_TR_DATA_UNIT_16_KB          = 5,
+	SDHCI_MSM_ICE_TR_DATA_UNIT_32_KB          = 6,
+	SDHCI_MSM_ICE_TR_DATA_UNIT_64_KB          = 7,
+};
+
+/* SDHCI MSM ICE internal state */
+enum {
+	SDHCI_MSM_ICE_STATE_DISABLED   = 0,
+	SDHCI_MSM_ICE_STATE_ACTIVE     = 1,
+	SDHCI_MSM_ICE_STATE_SUSPENDED  = 2,
+};
+
+/* crypto context fields in cmdq data command task descriptor */
+#define DATA_UNIT_NUM(x)	(((u64)(x) & 0xFFFFFFFF) << 0)
+#define CRYPTO_CONFIG_INDEX(x)	(((u64)(x) & 0xFF) << 32)
+#define CRYPTO_ENABLE(x)	(((u64)(x) & 0x1) << 47)
+
+#ifdef CONFIG_MMC_SDHCI_MSM_ICE
+int sdhci_msm_ice_get_dev(struct sdhci_host *host);
+int sdhci_msm_ice_init(struct sdhci_host *host);
+int sdhci_msm_ice_cfg(struct sdhci_host *host, struct mmc_request *mrq,
+			u32 slot);
+int sdhci_msm_ice_reset(struct sdhci_host *host);
+int sdhci_msm_ice_resume(struct sdhci_host *host);
+int sdhci_msm_ice_suspend(struct sdhci_host *host);
+int sdhci_msm_ice_get_status(struct sdhci_host *host, int *ice_status);
+void sdhci_msm_ice_print_regs(struct sdhci_host *host);
+#else
+inline int sdhci_msm_ice_get_dev(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+
+	if (msm_host) {
+		msm_host->ice.pdev = NULL;
+		msm_host->ice.vops = NULL;
+	}
+	return -ENODEV;
+}
+inline int sdhci_msm_ice_init(struct sdhci_host *host)
+{
+	return 0;
+}
+inline int sdhci_msm_ice_cfg(struct sdhci_host *host,
+		struct mmc_request *mrq, u32 slot)
+{
+	return 0;
+}
+inline int sdhci_msm_ice_reset(struct sdhci_host *host)
+{
+	return 0;
+}
+inline int sdhci_msm_ice_resume(struct sdhci_host *host)
+{
+	return 0;
+}
+inline int sdhci_msm_ice_suspend(struct sdhci_host *host)
+{
+	return 0;
+}
+inline int sdhci_msm_ice_get_status(struct sdhci_host *host,
+				   int *ice_status)
+{
+	return 0;
+}
+inline void sdhci_msm_ice_print_regs(struct sdhci_host *host)
+{
+	return;
+}
+#endif /* CONFIG_MMC_SDHCI_MSM_ICE */
+#endif /* __SDHCI_MSM_ICE_H__ */
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index c7f88a5..bcf33b8 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -2,7 +2,7 @@
  * drivers/mmc/host/sdhci-msm.c - Qualcomm Technologies, Inc. MSM SDHCI Platform
  * driver source file
  *
- * Copyright (c) 2012-2014, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2012-2017, The Linux Foundation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 and
@@ -21,6 +21,7 @@
 #include <linux/mmc/sdio_func.h>
 #include <linux/gfp.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/of_gpio.h>
 #include <linux/regulator/consumer.h>
 #include <linux/types.h>
@@ -31,18 +32,34 @@
 #include <linux/delay.h>
 #include <linux/scatterlist.h>
 #include <linux/slab.h>
-#include <linux/mmc/mmc.h>
 #include <linux/mmc/slot-gpio.h>
 #include <linux/dma-mapping.h>
 #include <linux/iopoll.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/iopoll.h>
 #include <linux/msm-bus.h>
+#include <linux/pm_runtime.h>
+#include <trace/events/mmc.h>
 
-#include "sdhci-pltfm.h"
+#include "sdhci-msm.h"
+#include "cmdq_hci.h"
+
+#define QOS_REMOVE_DELAY_MS	10
+#define CORE_POWER		0x0
+#define CORE_SW_RST		(1 << 7)
 
 #define SDHCI_VER_100		0x2B
 
+#define CORE_VERSION_STEP_MASK		0x0000FFFF
+#define CORE_VERSION_MINOR_MASK		0x0FFF0000
+#define CORE_VERSION_MINOR_SHIFT	16
+#define CORE_VERSION_MAJOR_MASK		0xF0000000
+#define CORE_VERSION_MAJOR_SHIFT	28
+#define CORE_VERSION_TARGET_MASK	0x000000FF
+#define SDHCI_MSM_VER_420               0x49
+
+#define SWITCHABLE_SIGNALLING_VOL	(1 << 29)
+
 #define CORE_VERSION_MAJOR_MASK		0xF0000000
 #define CORE_VERSION_MAJOR_SHIFT	28
 
@@ -50,22 +67,6 @@
 #define HC_MODE_EN		0x1
 #define FF_CLK_SW_RST_DIS	(1 << 13)
 
-#define CORE_GENERICS		0x70
-#define SWITCHABLE_SIGNALLING_VOL (1 << 29)
-
-#define CORE_POWER		0x0
-#define CORE_SW_RST		(1 << 7)
-
-#define CORE_MCI_VERSION	0x050
-#define CORE_TESTBUS_CONFIG	0x0CC
-#define CORE_TESTBUS_ENA	(1 << 3)
-#define CORE_SDCC_DEBUG_REG	0x124
-
-#define CORE_PWRCTL_STATUS	0xDC
-#define CORE_PWRCTL_MASK	0xE0
-#define CORE_PWRCTL_CLEAR	0xE4
-#define CORE_PWRCTL_CTL		0xE8
-
 #define CORE_PWRCTL_BUS_OFF	0x01
 #define CORE_PWRCTL_BUS_ON	(1 << 1)
 #define CORE_PWRCTL_IO_LOW	(1 << 2)
@@ -79,7 +80,6 @@
 #define INT_MASK		0xF
 #define MAX_PHASES		16
 
-#define CORE_DLL_CONFIG		0x100
 #define CORE_CMD_DAT_TRACK_SEL	(1 << 0)
 #define CORE_DLL_EN		(1 << 16)
 #define CORE_CDR_EN		(1 << 17)
@@ -88,30 +88,30 @@
 #define CORE_DLL_PDN		(1 << 29)
 #define CORE_DLL_RST		(1 << 30)
 
-#define CORE_DLL_STATUS		0x108
 #define CORE_DLL_LOCK		(1 << 7)
 #define CORE_DDR_DLL_LOCK	(1 << 11)
 
-#define CORE_VENDOR_SPEC	0x10C
-#define CORE_CLK_PWRSAVE	(1 << 1)
-#define CORE_HC_MCLK_SEL_DFLT	(2 << 8)
-#define CORE_HC_MCLK_SEL_HS400	(3 << 8)
-#define CORE_HC_MCLK_SEL_MASK	(3 << 8)
-#define CORE_HC_AUTO_CMD21_EN	(1 << 6)
+#define CORE_CLK_PWRSAVE		(1 << 1)
+#define CORE_HC_MCLK_SEL_DFLT		(2 << 8)
+#define CORE_HC_MCLK_SEL_HS400		(3 << 8)
+#define CORE_HC_MCLK_SEL_MASK		(3 << 8)
+#define CORE_HC_AUTO_CMD21_EN		(1 << 6)
+#define CORE_IO_PAD_PWR_SWITCH_EN	(1 << 15)
 #define CORE_IO_PAD_PWR_SWITCH	(1 << 16)
 #define CORE_HC_SELECT_IN_EN	(1 << 18)
 #define CORE_HC_SELECT_IN_HS400	(6 << 19)
 #define CORE_HC_SELECT_IN_MASK	(7 << 19)
+#define CORE_VENDOR_SPEC_POR_VAL	0xA1C
 
-#define CORE_VENDOR_SPEC_CAPABILITIES0	0x11C
+#define HC_SW_RST_WAIT_IDLE_DIS	(1 << 20)
+#define HC_SW_RST_REQ (1 << 21)
+#define CORE_ONE_MID_EN     (1 << 25)
+
 #define CORE_8_BIT_SUPPORT		(1 << 18)
 #define CORE_3_3V_SUPPORT		(1 << 24)
 #define CORE_3_0V_SUPPORT		(1 << 25)
 #define CORE_1_8V_SUPPORT		(1 << 26)
-#define CORE_SYS_BUS_SUPPORT_64_BIT	28
-
-#define CORE_VENDOR_SPEC_ADMA_ERR_ADDR0	0x114
-#define CORE_VENDOR_SPEC_ADMA_ERR_ADDR1	0x118
+#define CORE_SYS_BUS_SUPPORT_64_BIT	BIT(28)
 
 #define CORE_CSR_CDC_CTLR_CFG0		0x130
 #define CORE_SW_TRIG_FULL_CALIB		(1 << 16)
@@ -132,42 +132,189 @@
 
 #define CORE_CDC_ERROR_CODE_MASK	0x7000000
 
+#define CQ_CMD_DBG_RAM	                0x110
+#define CQ_CMD_DBG_RAM_WA               0x150
+#define CQ_CMD_DBG_RAM_OL               0x154
+
 #define CORE_CSR_CDC_GEN_CFG		0x178
 #define CORE_CDC_SWITCH_BYPASS_OFF	(1 << 0)
 #define CORE_CDC_SWITCH_RC_EN		(1 << 1)
 
-#define CORE_DDR_200_CFG		0x184
 #define CORE_CDC_T4_DLY_SEL		(1 << 0)
+#define CORE_CMDIN_RCLK_EN		(1 << 1)
 #define CORE_START_CDC_TRAFFIC		(1 << 6)
-#define CORE_VENDOR_SPEC3	0x1B0
+
 #define CORE_PWRSAVE_DLL	(1 << 3)
+#define CORE_CMDEN_HS400_INPUT_MASK_CNT (1 << 13)
 
-#define CORE_DLL_CONFIG_2	0x1B4
 #define CORE_DDR_CAL_EN		(1 << 0)
+#define CORE_FLL_CYCLE_CNT	(1 << 18)
+#define CORE_DLL_CLOCK_DISABLE	(1 << 21)
 
-#define CORE_DDR_CONFIG		0x1B8
-#define DDR_CONFIG_POR_VAL	0x80040853
-
-
-#define CORE_MCI_DATA_CNT 0x30
-#define CORE_MCI_STATUS 0x34
-#define CORE_MCI_FIFO_CNT 0x44
-
-#define CORE_TESTBUS_SEL2_BIT	4
-#define CORE_TESTBUS_SEL2	(1 << CORE_TESTBUS_SEL2_BIT)
+#define DDR_CONFIG_POR_VAL		0x80040853
+#define DDR_CONFIG_PRG_RCLK_DLY_MASK	0x1FF
+#define DDR_CONFIG_PRG_RCLK_DLY		115
+#define DDR_CONFIG_2_POR_VAL		0x80040873
 
 /* 512 descriptors */
 #define SDHCI_MSM_MAX_SEGMENTS  (1 << 9)
 #define SDHCI_MSM_MMC_CLK_GATE_DELAY	200 /* msecs */
 
 #define CORE_FREQ_100MHZ	(100 * 1000 * 1000)
+#define TCXO_FREQ		19200000
 
 #define INVALID_TUNING_PHASE	-1
-
-#define CORE_VERSION_TARGET_MASK	0x000000FF
+#define sdhci_is_valid_gpio_wakeup_int(_h) ((_h)->pdata->sdiowakeup_irq >= 0)
 
 #define NUM_TUNING_PHASES		16
-#define MAX_DRV_TYPES_SUPPORTED_HS200	3
+#define MAX_DRV_TYPES_SUPPORTED_HS200	4
+#define MSM_AUTOSUSPEND_DELAY_MS 100
+
+struct sdhci_msm_offset {
+	u32 CORE_MCI_DATA_CNT;
+	u32 CORE_MCI_STATUS;
+	u32 CORE_MCI_FIFO_CNT;
+	u32 CORE_MCI_VERSION;
+	u32 CORE_GENERICS;
+	u32 CORE_TESTBUS_CONFIG;
+	u32 CORE_TESTBUS_SEL2_BIT;
+	u32 CORE_TESTBUS_ENA;
+	u32 CORE_TESTBUS_SEL2;
+	u32 CORE_PWRCTL_STATUS;
+	u32 CORE_PWRCTL_MASK;
+	u32 CORE_PWRCTL_CLEAR;
+	u32 CORE_PWRCTL_CTL;
+	u32 CORE_SDCC_DEBUG_REG;
+	u32 CORE_DLL_CONFIG;
+	u32 CORE_DLL_STATUS;
+	u32 CORE_VENDOR_SPEC;
+	u32 CORE_VENDOR_SPEC_ADMA_ERR_ADDR0;
+	u32 CORE_VENDOR_SPEC_ADMA_ERR_ADDR1;
+	u32 CORE_VENDOR_SPEC_FUNC2;
+	u32 CORE_VENDOR_SPEC_CAPABILITIES0;
+	u32 CORE_DDR_200_CFG;
+	u32 CORE_VENDOR_SPEC3;
+	u32 CORE_DLL_CONFIG_2;
+	u32 CORE_DDR_CONFIG;
+	u32 CORE_DDR_CONFIG_2;
+};
+
+struct sdhci_msm_offset sdhci_msm_offset_mci_removed = {
+	.CORE_MCI_DATA_CNT = 0x35C,
+	.CORE_MCI_STATUS = 0x324,
+	.CORE_MCI_FIFO_CNT = 0x308,
+	.CORE_MCI_VERSION = 0x318,
+	.CORE_GENERICS = 0x320,
+	.CORE_TESTBUS_CONFIG = 0x32C,
+	.CORE_TESTBUS_SEL2_BIT = 3,
+	.CORE_TESTBUS_ENA = (1 << 31),
+	.CORE_TESTBUS_SEL2 = (1 << 3),
+	.CORE_PWRCTL_STATUS = 0x240,
+	.CORE_PWRCTL_MASK = 0x244,
+	.CORE_PWRCTL_CLEAR = 0x248,
+	.CORE_PWRCTL_CTL = 0x24C,
+	.CORE_SDCC_DEBUG_REG = 0x358,
+	.CORE_DLL_CONFIG = 0x200,
+	.CORE_DLL_STATUS = 0x208,
+	.CORE_VENDOR_SPEC = 0x20C,
+	.CORE_VENDOR_SPEC_ADMA_ERR_ADDR0 = 0x214,
+	.CORE_VENDOR_SPEC_ADMA_ERR_ADDR1 = 0x218,
+	.CORE_VENDOR_SPEC_FUNC2 = 0x210,
+	.CORE_VENDOR_SPEC_CAPABILITIES0 = 0x21C,
+	.CORE_DDR_200_CFG = 0x224,
+	.CORE_VENDOR_SPEC3 = 0x250,
+	.CORE_DLL_CONFIG_2 = 0x254,
+	.CORE_DDR_CONFIG = 0x258,
+	.CORE_DDR_CONFIG_2 = 0x25C,
+};
+
+struct sdhci_msm_offset sdhci_msm_offset_mci_present = {
+	.CORE_MCI_DATA_CNT = 0x30,
+	.CORE_MCI_STATUS = 0x34,
+	.CORE_MCI_FIFO_CNT = 0x44,
+	.CORE_MCI_VERSION = 0x050,
+	.CORE_GENERICS = 0x70,
+	.CORE_TESTBUS_CONFIG = 0x0CC,
+	.CORE_TESTBUS_SEL2_BIT = 4,
+	.CORE_TESTBUS_ENA = (1 << 3),
+	.CORE_TESTBUS_SEL2 = (1 << 4),
+	.CORE_PWRCTL_STATUS = 0xDC,
+	.CORE_PWRCTL_MASK = 0xE0,
+	.CORE_PWRCTL_CLEAR = 0xE4,
+	.CORE_PWRCTL_CTL = 0xE8,
+	.CORE_SDCC_DEBUG_REG = 0x124,
+	.CORE_DLL_CONFIG = 0x100,
+	.CORE_DLL_STATUS = 0x108,
+	.CORE_VENDOR_SPEC = 0x10C,
+	.CORE_VENDOR_SPEC_ADMA_ERR_ADDR0 = 0x114,
+	.CORE_VENDOR_SPEC_ADMA_ERR_ADDR1 = 0x118,
+	.CORE_VENDOR_SPEC_FUNC2 = 0x110,
+	.CORE_VENDOR_SPEC_CAPABILITIES0 = 0x11C,
+	.CORE_DDR_200_CFG = 0x184,
+	.CORE_VENDOR_SPEC3 = 0x1B0,
+	.CORE_DLL_CONFIG_2 = 0x1B4,
+	.CORE_DDR_CONFIG = 0x1B8,
+	.CORE_DDR_CONFIG_2 = 0x1BC,
+};
+
+u8 sdhci_msm_readb_relaxed(struct sdhci_host *host, u32 offset)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	void __iomem *base_addr;
+
+	if (msm_host->mci_removed)
+		base_addr = host->ioaddr;
+	else
+		base_addr = msm_host->core_mem;
+
+	return readb_relaxed(base_addr + offset);
+}
+
+u32 sdhci_msm_readl_relaxed(struct sdhci_host *host, u32 offset)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	void __iomem *base_addr;
+
+	if (msm_host->mci_removed)
+		base_addr = host->ioaddr;
+	else
+		base_addr = msm_host->core_mem;
+
+	return readl_relaxed(base_addr + offset);
+}
+
+void sdhci_msm_writeb_relaxed(u8 val, struct sdhci_host *host, u32 offset)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	void __iomem *base_addr;
+
+	if (msm_host->mci_removed)
+		base_addr = host->ioaddr;
+	else
+		base_addr = msm_host->core_mem;
+
+	writeb_relaxed(val, base_addr + offset);
+}
+
+void sdhci_msm_writel_relaxed(u32 val, struct sdhci_host *host, u32 offset)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	void __iomem *base_addr;
+
+	if (msm_host->mci_removed)
+		base_addr = host->ioaddr;
+	else
+		base_addr = msm_host->core_mem;
+
+	writel_relaxed(val, base_addr + offset);
+}
+
+/* Timeout value to avoid infinite waiting for pwr_irq */
+#define MSM_PWR_IRQ_TIMEOUT_MS 5000
 
 static const u32 tuning_block_64[] = {
 	0x00FF0FFF, 0xCCC3CCFF, 0xFFCC3CC3, 0xEFFEFFFE,
@@ -187,132 +334,15 @@
 	0xFFFFBBBB, 0xFFFF77FF, 0xFF7777FF, 0xEEDDBB77
 };
 
+/* global to hold each slot instance for debug */
+static struct sdhci_msm_host *sdhci_slot[2];
+
 static int disable_slots;
 /* root can write, others read */
 module_param(disable_slots, int, S_IRUGO|S_IWUSR);
 
-/* This structure keeps information per regulator */
-struct sdhci_msm_reg_data {
-	/* voltage regulator handle */
-	struct regulator *reg;
-	/* regulator name */
-	const char *name;
-	/* voltage level to be set */
-	u32 low_vol_level;
-	u32 high_vol_level;
-	/* Load values for low power and high power mode */
-	u32 lpm_uA;
-	u32 hpm_uA;
-
-	/* is this regulator enabled? */
-	bool is_enabled;
-	/* is this regulator needs to be always on? */
-	bool is_always_on;
-	/* is low power mode setting required for this regulator? */
-	bool lpm_sup;
-	bool set_voltage_sup;
-};
-
-#define MSM_MMC_DEFAULT_CPU_DMA_LATENCY 200 /* usecs */
-/*
- * This structure keeps information for all the
- * regulators required for a SDCC slot.
- */
-struct sdhci_msm_slot_reg_data {
-	/* keeps VDD/VCC regulator info */
-	struct sdhci_msm_reg_data *vdd_data;
-	 /* keeps VDD IO regulator info */
-	struct sdhci_msm_reg_data *vdd_io_data;
-};
-
-struct sdhci_msm_gpio {
-	u32 no;
-	const char *name;
-	bool is_enabled;
-};
-
-struct sdhci_msm_gpio_data {
-	struct sdhci_msm_gpio *gpio;
-	u8 size;
-};
-
-struct sdhci_msm_pin_data {
-	/*
-	 * = 1 if controller pins are using gpios
-	 * = 0 if controller has dedicated MSM pads
-	 */
-	u8 is_gpio;
-	struct sdhci_msm_gpio_data *gpio_data;
-};
-
-struct sdhci_pinctrl_data {
-	struct pinctrl          *pctrl;
-	struct pinctrl_state    *pins_active;
-	struct pinctrl_state    *pins_sleep;
-};
-
-struct sdhci_msm_bus_voting_data {
-	struct msm_bus_scale_pdata *bus_pdata;
-	unsigned int *bw_vecs;
-	unsigned int bw_vecs_size;
-};
-
-struct sdhci_msm_pltfm_data {
-	/* Supported UHS-I Modes */
-	u32 caps;
-
-	/* More capabilities */
-	u32 caps2;
-
-	unsigned long mmc_bus_width;
-	struct sdhci_msm_slot_reg_data *vreg_data;
-	bool nonremovable;
-	bool pin_cfg_sts;
-	struct sdhci_msm_pin_data *pin_data;
-	struct sdhci_pinctrl_data *pctrl_data;
-	u32 cpu_dma_latency_us;
-	int status_gpio; /* card detection GPIO that is configured as IRQ */
-	struct sdhci_msm_bus_voting_data *voting_data;
-	u32 *sup_clk_table;
-	unsigned char sup_clk_cnt;
-};
-
-struct sdhci_msm_bus_vote {
-	uint32_t client_handle;
-	uint32_t curr_vote;
-	int min_bw_vote;
-	int max_bw_vote;
-	bool is_max_bw_needed;
-	struct delayed_work vote_work;
-	struct device_attribute max_bus_bw;
-};
-
-struct sdhci_msm_host {
-	struct platform_device	*pdev;
-	void __iomem *core_mem;    /* MSM SDCC mapped address */
-	struct clk	 *clk;     /* main SD/MMC bus clock */
-	struct clk	 *pclk;    /* SDHC peripheral bus clock */
-	struct clk	 *bus_clk; /* SDHC bus voter clock */
-	struct clk	 *ff_clk; /* CDC calibration fixed feedback clock */
-	struct clk	 *sleep_clk; /* CDC calibration sleep clock */
-	atomic_t clks_on; /* Set if clocks are enabled */
-	struct sdhci_msm_pltfm_data *pdata;
-	struct mmc_host  *mmc;
-	struct sdhci_pltfm_data sdhci_msm_pdata;
-	u32 curr_pwr_state;
-	u32 curr_io_level;
-	struct completion pwr_irq_completion;
-	struct sdhci_msm_bus_vote msm_bus_vote;
-	struct device_attribute	polling;
-	u32 clk_rate; /* Keeps track of current clock rate that is set */
-	bool tuning_done;
-	bool calibration_done;
-	u8 saved_tuning_phase;
-	bool en_auto_cmd21;
-	struct device_attribute auto_cmd21_attr;
-	atomic_t controller_clock;
-	bool use_cdclp533;
-};
+static bool nocmdq;
+module_param(nocmdq, bool, S_IRUGO|S_IWUSR);
 
 enum vdd_io_level {
 	/* set vdd_io_data->low_vol_level */
@@ -334,10 +364,14 @@
 	u32 wait_cnt = 50;
 	u8 ck_out_en = 0;
 	struct mmc_host *mmc = host->mmc;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 
 	/* poll for CK_OUT_EN bit.  max. poll time = 50us */
-	ck_out_en = !!(readl_relaxed(host->ioaddr + CORE_DLL_CONFIG) &
-			CORE_CK_OUT_EN);
+	ck_out_en = !!(readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_DLL_CONFIG) & CORE_CK_OUT_EN);
 
 	while (ck_out_en != poll) {
 		if (--wait_cnt == 0) {
@@ -349,7 +383,7 @@
 		udelay(1);
 
 		ck_out_en = !!(readl_relaxed(host->ioaddr +
-				CORE_DLL_CONFIG) & CORE_CK_OUT_EN);
+			msm_host_offset->CORE_DLL_CONFIG) & CORE_CK_OUT_EN);
 	}
 out:
 	return rc;
@@ -363,18 +397,25 @@
 {
 	int rc = 0;
 	u32 config;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 
-	config = readl_relaxed(host->ioaddr + CORE_DLL_CONFIG);
+	config = readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_DLL_CONFIG);
 	config |= CORE_CDR_EN;
 	config &= ~(CORE_CDR_EXT_EN | CORE_CK_OUT_EN);
-	writel_relaxed(config, host->ioaddr + CORE_DLL_CONFIG);
+	writel_relaxed(config, host->ioaddr +
+		msm_host_offset->CORE_DLL_CONFIG);
 
 	rc = msm_dll_poll_ck_out_en(host, 0);
 	if (rc)
 		goto err;
 
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG) |
-			CORE_CK_OUT_EN), host->ioaddr + CORE_DLL_CONFIG);
+	writel_relaxed((readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_DLL_CONFIG) | CORE_CK_OUT_EN),
+		host->ioaddr + msm_host_offset->CORE_DLL_CONFIG);
 
 	rc = msm_dll_poll_ck_out_en(host, 1);
 	if (rc)
@@ -421,6 +462,8 @@
 	int rc = 0;
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 	u32 val = 0;
 
 	if (!msm_host->en_auto_cmd21)
@@ -433,11 +476,13 @@
 
 	if (enable) {
 		rc = msm_enable_cdr_cm_sdc4_dll(host);
-		writel_relaxed(readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC) |
-			       val, host->ioaddr + CORE_VENDOR_SPEC);
+		writel_relaxed(readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC) | val,
+			host->ioaddr + msm_host_offset->CORE_VENDOR_SPEC);
 	} else {
-		writel_relaxed(readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC) &
-			       ~val, host->ioaddr + CORE_VENDOR_SPEC);
+		writel_relaxed(readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC) & ~val,
+			host->ioaddr + msm_host_offset->CORE_VENDOR_SPEC);
 	}
 	return rc;
 }
@@ -445,6 +490,10 @@
 static int msm_config_cm_dll_phase(struct sdhci_host *host, u8 phase)
 {
 	int rc = 0;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 	u8 grey_coded_phase_table[] = {0x0, 0x1, 0x3, 0x2, 0x6, 0x7, 0x5, 0x4,
 					0xC, 0xD, 0xF, 0xE, 0xA, 0xB, 0x9,
 					0x8};
@@ -455,10 +504,12 @@
 	pr_debug("%s: Enter %s\n", mmc_hostname(mmc), __func__);
 	spin_lock_irqsave(&host->lock, flags);
 
-	config = readl_relaxed(host->ioaddr + CORE_DLL_CONFIG);
+	config = readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_DLL_CONFIG);
 	config &= ~(CORE_CDR_EN | CORE_CK_OUT_EN);
 	config |= (CORE_CDR_EXT_EN | CORE_DLL_EN);
-	writel_relaxed(config, host->ioaddr + CORE_DLL_CONFIG);
+	writel_relaxed(config, host->ioaddr +
+		msm_host_offset->CORE_DLL_CONFIG);
 
 	/* Wait until CK_OUT_EN bit of DLL_CONFIG register becomes '0' */
 	rc = msm_dll_poll_ck_out_en(host, 0);
@@ -469,24 +520,28 @@
 	 * Write the selected DLL clock output phase (0 ... 15)
 	 * to CDR_SELEXT bit field of DLL_CONFIG register.
 	 */
-	writel_relaxed(((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG)
+	writel_relaxed(((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DLL_CONFIG)
 			& ~(0xF << 20))
 			| (grey_coded_phase_table[phase] << 20)),
-			host->ioaddr + CORE_DLL_CONFIG);
+			host->ioaddr + msm_host_offset->CORE_DLL_CONFIG);
 
 	/* Set CK_OUT_EN bit of DLL_CONFIG register to 1. */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG)
-			| CORE_CK_OUT_EN), host->ioaddr + CORE_DLL_CONFIG);
+	writel_relaxed((readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_DLL_CONFIG) | CORE_CK_OUT_EN),
+		host->ioaddr + msm_host_offset->CORE_DLL_CONFIG);
 
 	/* Wait until CK_OUT_EN bit of DLL_CONFIG register becomes '1' */
 	rc = msm_dll_poll_ck_out_en(host, 1);
 	if (rc)
 		goto err_out;
 
-	config = readl_relaxed(host->ioaddr + CORE_DLL_CONFIG);
+	config = readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_DLL_CONFIG);
 	config |= CORE_CDR_EN;
 	config &= ~CORE_CDR_EXT_EN;
-	writel_relaxed(config, host->ioaddr + CORE_DLL_CONFIG);
+	writel_relaxed(config, host->ioaddr +
+		msm_host_offset->CORE_DLL_CONFIG);
 	goto out;
 
 err_out:
@@ -615,6 +670,10 @@
 static inline void msm_cm_dll_set_freq(struct sdhci_host *host)
 {
 	u32 mclk_freq = 0;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 
 	/* Program the MCLK value to MCLK_FREQ bit field */
 	if (host->clock <= 112000000)
@@ -634,14 +693,19 @@
 	else if (host->clock <= 200000000)
 		mclk_freq = 7;
 
-	writel_relaxed(((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG)
+	writel_relaxed(((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DLL_CONFIG)
 			& ~(7 << 24)) | (mclk_freq << 24)),
-			host->ioaddr + CORE_DLL_CONFIG);
+			host->ioaddr + msm_host_offset->CORE_DLL_CONFIG);
 }
 
 /* Initialize the DLL (Programmable Delay Line ) */
 static int msm_init_cm_dll(struct sdhci_host *host)
 {
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 	struct mmc_host *mmc = host->mmc;
 	int rc = 0;
 	unsigned long flags;
@@ -650,8 +714,8 @@
 
 	pr_debug("%s: Enter %s\n", mmc_hostname(mmc), __func__);
 	spin_lock_irqsave(&host->lock, flags);
-	prev_pwrsave = !!(readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC) &
-			  CORE_CLK_PWRSAVE);
+	prev_pwrsave = !!(readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_VENDOR_SPEC) & CORE_CLK_PWRSAVE);
 	curr_pwrsave = prev_pwrsave;
 	/*
 	 * Make sure that clock is always enabled when DLL
@@ -660,41 +724,89 @@
 	 * here and re-enable it once tuning is completed.
 	 */
 	if (prev_pwrsave) {
-		writel_relaxed((readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC)
-				& ~CORE_CLK_PWRSAVE),
-				host->ioaddr + CORE_VENDOR_SPEC);
+		writel_relaxed((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC)
+			& ~CORE_CLK_PWRSAVE), host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC);
 		curr_pwrsave = false;
 	}
 
+	if (msm_host->use_updated_dll_reset) {
+		/* Disable the DLL clock */
+		writel_relaxed((readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_DLL_CONFIG)
+				& ~CORE_CK_OUT_EN), host->ioaddr +
+				msm_host_offset->CORE_DLL_CONFIG);
+
+		writel_relaxed((readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_DLL_CONFIG_2)
+				| CORE_DLL_CLOCK_DISABLE), host->ioaddr +
+				msm_host_offset->CORE_DLL_CONFIG_2);
+	}
+
 	/* Write 1 to DLL_RST bit of DLL_CONFIG register */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG)
-			| CORE_DLL_RST), host->ioaddr + CORE_DLL_CONFIG);
+	writel_relaxed((readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_DLL_CONFIG) | CORE_DLL_RST),
+		host->ioaddr + msm_host_offset->CORE_DLL_CONFIG);
 
 	/* Write 1 to DLL_PDN bit of DLL_CONFIG register */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG)
-			| CORE_DLL_PDN), host->ioaddr + CORE_DLL_CONFIG);
+	writel_relaxed((readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_DLL_CONFIG) | CORE_DLL_PDN),
+		host->ioaddr + msm_host_offset->CORE_DLL_CONFIG);
 	msm_cm_dll_set_freq(host);
 
+	if (msm_host->use_updated_dll_reset) {
+		u32 mclk_freq = 0;
+
+		if ((readl_relaxed(host->ioaddr +
+					msm_host_offset->CORE_DLL_CONFIG_2)
+					& CORE_FLL_CYCLE_CNT))
+			mclk_freq = (u32) ((host->clock / TCXO_FREQ) * 8);
+		else
+			mclk_freq = (u32) ((host->clock / TCXO_FREQ) * 4);
+
+		writel_relaxed(((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DLL_CONFIG_2)
+			& ~(0xFF << 10)) | (mclk_freq << 10)),
+			host->ioaddr + msm_host_offset->CORE_DLL_CONFIG_2);
+		/* wait for 5us before enabling DLL clock */
+		udelay(5);
+	}
+
 	/* Write 0 to DLL_RST bit of DLL_CONFIG register */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG)
-			& ~CORE_DLL_RST), host->ioaddr + CORE_DLL_CONFIG);
+	writel_relaxed((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DLL_CONFIG) & ~CORE_DLL_RST),
+			host->ioaddr + msm_host_offset->CORE_DLL_CONFIG);
 
 	/* Write 0 to DLL_PDN bit of DLL_CONFIG register */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG)
-			& ~CORE_DLL_PDN), host->ioaddr + CORE_DLL_CONFIG);
+	writel_relaxed((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DLL_CONFIG) & ~CORE_DLL_PDN),
+			host->ioaddr + msm_host_offset->CORE_DLL_CONFIG);
+
+	if (msm_host->use_updated_dll_reset) {
+		msm_cm_dll_set_freq(host);
+		/* Enable the DLL clock */
+		writel_relaxed((readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_DLL_CONFIG_2)
+				& ~CORE_DLL_CLOCK_DISABLE), host->ioaddr +
+				msm_host_offset->CORE_DLL_CONFIG_2);
+	}
 
 	/* Set DLL_EN bit to 1. */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG)
-			| CORE_DLL_EN), host->ioaddr + CORE_DLL_CONFIG);
+	writel_relaxed((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DLL_CONFIG) | CORE_DLL_EN),
+			host->ioaddr + msm_host_offset->CORE_DLL_CONFIG);
 
 	/* Set CK_OUT_EN bit to 1. */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG)
-			| CORE_CK_OUT_EN), host->ioaddr + CORE_DLL_CONFIG);
+	writel_relaxed((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DLL_CONFIG)
+			| CORE_CK_OUT_EN), host->ioaddr +
+			msm_host_offset->CORE_DLL_CONFIG);
 
 	wait_cnt = 50;
 	/* Wait until DLL_LOCK bit of DLL_STATUS register becomes '1' */
-	while (!(readl_relaxed(host->ioaddr + CORE_DLL_STATUS) &
-		CORE_DLL_LOCK)) {
+	while (!(readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_DLL_STATUS) & CORE_DLL_LOCK)) {
 		/* max. wait for 50us sec for LOCK bit to be set */
 		if (--wait_cnt == 0) {
 			pr_err("%s: %s: DLL failed to LOCK\n",
@@ -709,14 +821,16 @@
 out:
 	/* Restore the correct PWRSAVE state */
 	if (prev_pwrsave ^ curr_pwrsave) {
-		u32 reg = readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC);
+		u32 reg = readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC);
 
 		if (prev_pwrsave)
 			reg |= CORE_CLK_PWRSAVE;
 		else
 			reg &= ~CORE_CLK_PWRSAVE;
 
-		writel_relaxed(reg, host->ioaddr + CORE_VENDOR_SPEC);
+		writel_relaxed(reg, host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC);
 	}
 
 	spin_unlock_irqrestore(&host->lock, flags);
@@ -729,13 +843,18 @@
 	u32 calib_done;
 	int ret = 0;
 	int cdc_err = 0;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 
 	pr_debug("%s: Enter %s\n", mmc_hostname(host->mmc), __func__);
 
 	/* Write 0 to CDC_T4_DLY_SEL field in VENDOR_SPEC_DDR200_CFG */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DDR_200_CFG)
+	writel_relaxed((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DDR_200_CFG)
 			& ~CORE_CDC_T4_DLY_SEL),
-			host->ioaddr + CORE_DDR_200_CFG);
+			host->ioaddr + msm_host_offset->CORE_DDR_200_CFG);
 
 	/* Write 0 to CDC_SWITCH_BYPASS_OFF field in CORE_CSR_CDC_GEN_CFG */
 	writel_relaxed((readl_relaxed(host->ioaddr + CORE_CSR_CDC_GEN_CFG)
@@ -748,9 +867,10 @@
 			host->ioaddr + CORE_CSR_CDC_GEN_CFG);
 
 	/* Write 0 to START_CDC_TRAFFIC field in CORE_DDR200_CFG */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DDR_200_CFG)
+	writel_relaxed((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DDR_200_CFG)
 			& ~CORE_START_CDC_TRAFFIC),
-			host->ioaddr + CORE_DDR_200_CFG);
+			host->ioaddr + msm_host_offset->CORE_DDR_200_CFG);
 
 	/*
 	 * Perform CDC Register Initialization Sequence
@@ -821,9 +941,10 @@
 	}
 
 	/* Write 1 to START_CDC_TRAFFIC field in CORE_DDR200_CFG */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DDR_200_CFG)
+	writel_relaxed((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DDR_200_CFG)
 			| CORE_START_CDC_TRAFFIC),
-			host->ioaddr + CORE_DDR_200_CFG);
+			host->ioaddr + msm_host_offset->CORE_DDR_200_CFG);
 out:
 	pr_debug("%s: Exit %s, ret:%d\n", mmc_hostname(host->mmc),
 			__func__, ret);
@@ -832,27 +953,45 @@
 
 static int sdhci_msm_cm_dll_sdc4_calibration(struct sdhci_host *host)
 {
-	u32 dll_status;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
+	u32 dll_status, ddr_config;
 	int ret = 0;
 
 	pr_debug("%s: Enter %s\n", mmc_hostname(host->mmc), __func__);
 
 	/*
-	 * Currently the CORE_DDR_CONFIG register defaults to desired
-	 * configuration on reset. Currently reprogramming the power on
-	 * reset (POR) value in case it might have been modified by
-	 * bootloaders. In the future, if this changes, then the desired
-	 * values will need to be programmed appropriately.
+	 * Reprogramming the value in case it might have been modified by
+	 * bootloaders.
 	 */
-	writel_relaxed(DDR_CONFIG_POR_VAL, host->ioaddr + CORE_DDR_CONFIG);
+	if (msm_host->rclk_delay_fix) {
+		writel_relaxed(DDR_CONFIG_2_POR_VAL, host->ioaddr +
+			msm_host_offset->CORE_DDR_CONFIG_2);
+	} else {
+		ddr_config = DDR_CONFIG_POR_VAL &
+				~DDR_CONFIG_PRG_RCLK_DLY_MASK;
+		ddr_config |= DDR_CONFIG_PRG_RCLK_DLY;
+		writel_relaxed(ddr_config, host->ioaddr +
+			msm_host_offset->CORE_DDR_CONFIG);
+	}
+
+	if (msm_host->enhanced_strobe && mmc_card_strobe(msm_host->mmc->card))
+		writel_relaxed((readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_DDR_200_CFG)
+				| CORE_CMDIN_RCLK_EN), host->ioaddr +
+				msm_host_offset->CORE_DDR_200_CFG);
 
 	/* Write 1 to DDR_CAL_EN field in CORE_DLL_CONFIG_2 */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG_2)
+	writel_relaxed((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DLL_CONFIG_2)
 			| CORE_DDR_CAL_EN),
-			host->ioaddr + CORE_DLL_CONFIG_2);
+			host->ioaddr + msm_host_offset->CORE_DLL_CONFIG_2);
 
 	/* Poll on DDR_DLL_LOCK bit in CORE_DLL_STATUS to be set */
-	ret = readl_poll_timeout(host->ioaddr + CORE_DLL_STATUS,
+	ret = readl_poll_timeout(host->ioaddr +
+		 msm_host_offset->CORE_DLL_STATUS,
 		 dll_status, (dll_status & CORE_DDR_DLL_LOCK), 10, 1000);
 
 	if (ret == -ETIMEDOUT) {
@@ -861,10 +1000,19 @@
 		goto out;
 	}
 
-	/* set CORE_PWRSAVE_DLL bit in CORE_VENDOR_SPEC3 */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC3)
-			| CORE_PWRSAVE_DLL),
-			host->ioaddr + CORE_VENDOR_SPEC3);
+	/*
+	 * set CORE_PWRSAVE_DLL bit in CORE_VENDOR_SPEC3.
+	 * when MCLK is gated OFF, it is not gated for less than 0.5us
+	 * and MCLK must be switched on for at-least 1us before DATA
+	 * starts coming. Controllers with 14lpp tech DLL cannot
+	 * guarantee above requirement. So PWRSAVE_DLL should not be
+	 * turned on for host controllers using this DLL.
+	 */
+	if (!msm_host->use_14lpp_dll)
+		writel_relaxed((readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC3)
+				| CORE_PWRSAVE_DLL), host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC3);
 	mb();
 out:
 	pr_debug("%s: Exit %s, ret:%d\n", mmc_hostname(host->mmc),
@@ -872,11 +1020,49 @@
 	return ret;
 }
 
+static int sdhci_msm_enhanced_strobe(struct sdhci_host *host)
+{
+	int ret = 0;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	struct mmc_host *mmc = host->mmc;
+
+	pr_debug("%s: Enter %s\n", mmc_hostname(host->mmc), __func__);
+
+	if (!msm_host->enhanced_strobe || !mmc_card_strobe(mmc->card)) {
+		pr_debug("%s: host/card does not support hs400 enhanced strobe\n",
+				mmc_hostname(mmc));
+		return -EINVAL;
+	}
+
+	if (msm_host->calibration_done ||
+		!(mmc->ios.timing == MMC_TIMING_MMC_HS400)) {
+		return 0;
+	}
+
+	/*
+	 * Reset the tuning block.
+	 */
+	ret = msm_init_cm_dll(host);
+	if (ret)
+		goto out;
+
+	ret = sdhci_msm_cm_dll_sdc4_calibration(host);
+out:
+	if (!ret)
+		msm_host->calibration_done = true;
+	pr_debug("%s: Exit %s, ret:%d\n", mmc_hostname(host->mmc),
+			__func__, ret);
+	return ret;
+}
+
 static int sdhci_msm_hs400_dll_calibration(struct sdhci_host *host)
 {
 	int ret = 0;
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 
 	pr_debug("%s: Enter %s\n", mmc_hostname(host->mmc), __func__);
 
@@ -894,9 +1080,10 @@
 		goto out;
 
 	/* Write 1 to CMD_DAT_TRACK_SEL field in DLL_CONFIG */
-	writel_relaxed((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG)
-				| CORE_CMD_DAT_TRACK_SEL),
-				host->ioaddr + CORE_DLL_CONFIG);
+	writel_relaxed((readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_DLL_CONFIG)
+				| CORE_CMD_DAT_TRACK_SEL), host->ioaddr +
+				msm_host_offset->CORE_DLL_CONFIG);
 
 	if (msm_host->use_cdclp533)
 		/* Calibrate CDCLP533 DLL HW */
@@ -954,6 +1141,7 @@
 	u8 drv_type = 0;
 	bool drv_type_changed = false;
 	struct mmc_card *card = host->mmc->card;
+	int sts_retry;
 
 	/*
 	 * Tuning is required for SDR104, HS200 and HS400 cards and
@@ -965,6 +1153,13 @@
 		(ios.timing == MMC_TIMING_UHS_SDR104)))
 		return 0;
 
+	/*
+	 * Don't allow re-tuning for CRC errors observed for any commands
+	 * that are sent during tuning sequence itself.
+	 */
+	if (msm_host->tuning_in_progress)
+		return 0;
+	msm_host->tuning_in_progress = true;
 	pr_debug("%s: Enter %s\n", mmc_hostname(mmc), __func__);
 
 	/* CDC/SDC4 DLL HW calibration is only required for HS400 mode*/
@@ -1010,6 +1205,7 @@
 			.data = &data
 		};
 		struct scatterlist sg;
+		struct mmc_command sts_cmd = {0};
 
 		/* set the phase in delay line hw block */
 		rc = msm_config_cm_dll_phase(host, phase);
@@ -1030,6 +1226,35 @@
 		memset(data_buf, 0, size);
 		mmc_wait_for_req(mmc, &mrq);
 
+		if (card && (cmd.error || data.error)) {
+			sts_cmd.opcode = MMC_SEND_STATUS;
+			sts_cmd.arg = card->rca << 16;
+			sts_cmd.flags = MMC_RSP_R1 | MMC_CMD_AC;
+			sts_retry = 5;
+			while (sts_retry) {
+				mmc_wait_for_cmd(mmc, &sts_cmd, 0);
+
+				if (sts_cmd.error ||
+				   (R1_CURRENT_STATE(sts_cmd.resp[0])
+				   != R1_STATE_TRAN)) {
+					sts_retry--;
+					/*
+					 * wait for at least 146 MCLK cycles for
+					 * the card to move to TRANS state. As
+					 * the MCLK would be min 200MHz for
+					 * tuning, we need max 0.73us delay. To
+					 * be on safer side 1ms delay is given.
+					 */
+					usleep_range(1000, 1200);
+					pr_debug("%s: phase %d sts cmd err %d resp 0x%x\n",
+						mmc_hostname(mmc), phase,
+						sts_cmd.error, sts_cmd.resp[0]);
+					continue;
+				}
+				break;
+			};
+		}
+
 		if (!cmd.error && !data.error &&
 			!memcmp(data_buf, tuning_block_pattern, size)) {
 			/* tuning is successful at this tuning point */
@@ -1060,6 +1285,8 @@
 
 		/* set drive type to other value . default setting is 0x0 */
 		while (++drv_type <= MAX_DRV_TYPES_SUPPORTED_HS200) {
+			pr_debug("%s: trying different drive strength (%d)\n",
+				mmc_hostname(mmc), drv_type);
 			if (card->ext_csd.raw_driver_strength &
 					(1 << drv_type)) {
 				sdhci_msm_set_mmc_drv_type(host, opcode,
@@ -1109,6 +1336,7 @@
 	if (!rc)
 		msm_host->tuning_done = true;
 	spin_unlock_irqrestore(&host->lock, flags);
+	msm_host->tuning_in_progress = false;
 	pr_debug("%s: Exit %s, err(%d)\n", mmc_hostname(mmc), __func__, rc);
 	return rc;
 }
@@ -1403,13 +1631,186 @@
 	return ret;
 }
 
+#ifdef CONFIG_SMP
+static inline void parse_affine_irq(struct sdhci_msm_pltfm_data *pdata)
+{
+	pdata->pm_qos_data.irq_req_type = PM_QOS_REQ_AFFINE_IRQ;
+}
+#else
+static inline void parse_affine_irq(struct sdhci_msm_pltfm_data *pdata) { }
+#endif
+
+static int sdhci_msm_pm_qos_parse_irq(struct device *dev,
+		struct sdhci_msm_pltfm_data *pdata)
+{
+	struct device_node *np = dev->of_node;
+	const char *str;
+	u32 cpu;
+	int ret = 0;
+	int i;
+
+	pdata->pm_qos_data.irq_valid = false;
+	pdata->pm_qos_data.irq_req_type = PM_QOS_REQ_AFFINE_CORES;
+	if (!of_property_read_string(np, "qcom,pm-qos-irq-type", &str) &&
+		!strcmp(str, "affine_irq")) {
+		parse_affine_irq(pdata);
+	}
+
+	/* must specify cpu for "affine_cores" type */
+	if (pdata->pm_qos_data.irq_req_type == PM_QOS_REQ_AFFINE_CORES) {
+		pdata->pm_qos_data.irq_cpu = -1;
+		ret = of_property_read_u32(np, "qcom,pm-qos-irq-cpu", &cpu);
+		if (ret) {
+			dev_err(dev, "%s: error %d reading irq cpu\n", __func__,
+				ret);
+			goto out;
+		}
+		if (cpu < 0 || cpu >= num_possible_cpus()) {
+			dev_err(dev, "%s: invalid irq cpu %d (NR_CPUS=%d)\n",
+				__func__, cpu, num_possible_cpus());
+			ret = -EINVAL;
+			goto out;
+		}
+		pdata->pm_qos_data.irq_cpu = cpu;
+	}
+
+	if (of_property_count_u32_elems(np, "qcom,pm-qos-irq-latency") !=
+		SDHCI_POWER_POLICY_NUM) {
+		dev_err(dev, "%s: could not read %d values for 'qcom,pm-qos-irq-latency'\n",
+			__func__, SDHCI_POWER_POLICY_NUM);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	for (i = 0; i < SDHCI_POWER_POLICY_NUM; i++)
+		of_property_read_u32_index(np, "qcom,pm-qos-irq-latency", i,
+			&pdata->pm_qos_data.irq_latency.latency[i]);
+
+	pdata->pm_qos_data.irq_valid = true;
+out:
+	return ret;
+}
+
+static int sdhci_msm_pm_qos_parse_cpu_groups(struct device *dev,
+		struct sdhci_msm_pltfm_data *pdata)
+{
+	struct device_node *np = dev->of_node;
+	u32 mask;
+	int nr_groups;
+	int ret;
+	int i;
+
+	/* Read cpu group mapping */
+	nr_groups = of_property_count_u32_elems(np, "qcom,pm-qos-cpu-groups");
+	if (nr_groups <= 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+	pdata->pm_qos_data.cpu_group_map.nr_groups = nr_groups;
+	pdata->pm_qos_data.cpu_group_map.mask =
+		kcalloc(nr_groups, sizeof(cpumask_t), GFP_KERNEL);
+	if (!pdata->pm_qos_data.cpu_group_map.mask) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	for (i = 0; i < nr_groups; i++) {
+		of_property_read_u32_index(np, "qcom,pm-qos-cpu-groups",
+			i, &mask);
+
+		pdata->pm_qos_data.cpu_group_map.mask[i].bits[0] = mask;
+		if (!cpumask_subset(&pdata->pm_qos_data.cpu_group_map.mask[i],
+			cpu_possible_mask)) {
+			dev_err(dev, "%s: invalid mask 0x%x of cpu group #%d\n",
+				__func__, mask, i);
+			ret = -EINVAL;
+			goto free_res;
+		}
+	}
+	return 0;
+
+free_res:
+	kfree(pdata->pm_qos_data.cpu_group_map.mask);
+out:
+	return ret;
+}
+
+static int sdhci_msm_pm_qos_parse_latency(struct device *dev, const char *name,
+		int nr_groups, struct sdhci_msm_pm_qos_latency **latency)
+{
+	struct device_node *np = dev->of_node;
+	struct sdhci_msm_pm_qos_latency *values;
+	int ret;
+	int i;
+	int group;
+	int cfg;
+
+	ret = of_property_count_u32_elems(np, name);
+	if (ret > 0 && ret != SDHCI_POWER_POLICY_NUM * nr_groups) {
+		dev_err(dev, "%s: invalid number of values for property %s: expected=%d actual=%d\n",
+			__func__, name,	SDHCI_POWER_POLICY_NUM * nr_groups,
+			ret);
+		return -EINVAL;
+	} else if (ret < 0) {
+		return ret;
+	}
+
+	values = kcalloc(nr_groups, sizeof(struct sdhci_msm_pm_qos_latency),
+			GFP_KERNEL);
+	if (!values)
+		return -ENOMEM;
+
+	for (i = 0; i < SDHCI_POWER_POLICY_NUM * nr_groups; i++) {
+		group = i / SDHCI_POWER_POLICY_NUM;
+		cfg = i % SDHCI_POWER_POLICY_NUM;
+		of_property_read_u32_index(np, name, i,
+				&(values[group].latency[cfg]));
+	}
+
+	*latency = values;
+	return 0;
+}
+
+static void sdhci_msm_pm_qos_parse(struct device *dev,
+				struct sdhci_msm_pltfm_data *pdata)
+{
+	if (sdhci_msm_pm_qos_parse_irq(dev, pdata))
+		dev_notice(dev, "%s: PM QoS voting for IRQ will be disabled\n",
+			__func__);
+
+	if (!sdhci_msm_pm_qos_parse_cpu_groups(dev, pdata)) {
+		pdata->pm_qos_data.cmdq_valid =
+			!sdhci_msm_pm_qos_parse_latency(dev,
+				"qcom,pm-qos-cmdq-latency-us",
+				pdata->pm_qos_data.cpu_group_map.nr_groups,
+				&pdata->pm_qos_data.cmdq_latency);
+		pdata->pm_qos_data.legacy_valid =
+			!sdhci_msm_pm_qos_parse_latency(dev,
+				"qcom,pm-qos-legacy-latency-us",
+				pdata->pm_qos_data.cpu_group_map.nr_groups,
+				&pdata->pm_qos_data.latency);
+		if (!pdata->pm_qos_data.cmdq_valid &&
+			!pdata->pm_qos_data.legacy_valid) {
+			/* clean-up previously allocated arrays */
+			kfree(pdata->pm_qos_data.latency);
+			kfree(pdata->pm_qos_data.cmdq_latency);
+			dev_err(dev, "%s: invalid PM QoS latency values. Voting for cpu group will be disabled\n",
+				__func__);
+		}
+	} else {
+		dev_notice(dev, "%s: PM QoS voting for cpu group will be disabled\n",
+			__func__);
+	}
+}
+
 /* Parse platform data */
-static struct sdhci_msm_pltfm_data *sdhci_msm_populate_pdata(struct device *dev)
+static
+struct sdhci_msm_pltfm_data *sdhci_msm_populate_pdata(struct device *dev,
+						struct sdhci_msm_host *msm_host)
 {
 	struct sdhci_msm_pltfm_data *pdata = NULL;
 	struct device_node *np = dev->of_node;
 	u32 bus_width = 0;
-	u32 cpu_dma_latency;
 	int len, i;
 	int clk_table_len;
 	u32 *clk_table = NULL;
@@ -1435,11 +1836,15 @@
 		pdata->mmc_bus_width = 0;
 	}
 
-	if (!of_property_read_u32(np, "qcom,cpu-dma-latency-us",
-				&cpu_dma_latency))
-		pdata->cpu_dma_latency_us = cpu_dma_latency;
-	else
-		pdata->cpu_dma_latency_us = MSM_MMC_DEFAULT_CPU_DMA_LATENCY;
+	if (sdhci_msm_dt_get_array(dev, "qcom,devfreq,freq-table",
+			&msm_host->mmc->clk_scaling.freq_table,
+			&msm_host->mmc->clk_scaling.freq_table_sz, 0))
+		pr_debug("%s: no clock scaling frequencies were supplied\n",
+			dev_name(dev));
+	else if (!msm_host->mmc->clk_scaling.freq_table ||
+			!msm_host->mmc->clk_scaling.freq_table_sz)
+			dev_err(dev, "bad dts clock scaling frequencies\n");
+
 	if (sdhci_msm_dt_get_array(dev, "qcom,clk-rates",
 			&clk_table, &clk_table_len, 0)) {
 		dev_err(dev, "failed parsing supported clock rates\n");
@@ -1506,6 +1911,20 @@
 	if (of_get_property(np, "qcom,nonremovable", NULL))
 		pdata->nonremovable = true;
 
+	if (of_get_property(np, "qcom,nonhotplug", NULL))
+		pdata->nonhotplug = true;
+
+	pdata->largeaddressbus =
+		of_property_read_bool(np, "qcom,large-address-bus");
+
+	if (of_property_read_bool(np, "qcom,wakeup-on-idle"))
+		msm_host->mmc->wakeup_on_idle = true;
+
+	sdhci_msm_pm_qos_parse(dev, pdata);
+
+	if (of_get_property(np, "qcom,core_3_0v_support", NULL))
+		pdata->core_3_0v_support = true;
+
 	return pdata;
 out:
 	return NULL;
@@ -2010,23 +2429,84 @@
 	return ret;
 }
 
+/*
+ * Acquire spin-lock host->lock before calling this function
+ */
+static void sdhci_msm_cfg_sdiowakeup_gpio_irq(struct sdhci_host *host,
+					      bool enable)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+
+	if (enable && !msm_host->is_sdiowakeup_enabled)
+		enable_irq(msm_host->pdata->sdiowakeup_irq);
+	else if (!enable && msm_host->is_sdiowakeup_enabled)
+		disable_irq_nosync(msm_host->pdata->sdiowakeup_irq);
+	else
+		dev_warn(&msm_host->pdev->dev, "%s: wakeup to config: %d curr: %d\n",
+			__func__, enable, msm_host->is_sdiowakeup_enabled);
+	msm_host->is_sdiowakeup_enabled = enable;
+}
+
+static irqreturn_t sdhci_msm_sdiowakeup_irq(int irq, void *data)
+{
+	struct sdhci_host *host = (struct sdhci_host *)data;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+
+	unsigned long flags;
+
+	pr_debug("%s: irq (%d) received\n", __func__, irq);
+
+	spin_lock_irqsave(&host->lock, flags);
+	sdhci_msm_cfg_sdiowakeup_gpio_irq(host, false);
+	spin_unlock_irqrestore(&host->lock, flags);
+	msm_host->sdio_pending_processing = true;
+
+	return IRQ_HANDLED;
+}
+
+void sdhci_msm_dump_pwr_ctrl_regs(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
+
+	pr_err("%s: PWRCTL_STATUS: 0x%08x | PWRCTL_MASK: 0x%08x | PWRCTL_CTL: 0x%08x\n",
+		mmc_hostname(host->mmc),
+		sdhci_msm_readl_relaxed(host,
+			msm_host_offset->CORE_PWRCTL_STATUS),
+		sdhci_msm_readl_relaxed(host,
+			msm_host_offset->CORE_PWRCTL_MASK),
+		sdhci_msm_readl_relaxed(host,
+			msm_host_offset->CORE_PWRCTL_CTL));
+}
+
 static irqreturn_t sdhci_msm_pwr_irq(int irq, void *data)
 {
 	struct sdhci_host *host = (struct sdhci_host *)data;
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 	u8 irq_status = 0;
 	u8 irq_ack = 0;
 	int ret = 0;
 	int pwr_state = 0, io_level = 0;
 	unsigned long flags;
+	int retry = 10;
 
-	irq_status = readb_relaxed(msm_host->core_mem + CORE_PWRCTL_STATUS);
+	irq_status = sdhci_msm_readb_relaxed(host,
+		msm_host_offset->CORE_PWRCTL_STATUS);
+
 	pr_debug("%s: Received IRQ(%d), status=0x%x\n",
 		mmc_hostname(msm_host->mmc), irq, irq_status);
 
 	/* Clear the interrupt */
-	writeb_relaxed(irq_status, (msm_host->core_mem + CORE_PWRCTL_CLEAR));
+	sdhci_msm_writeb_relaxed(irq_status, host,
+		msm_host_offset->CORE_PWRCTL_CLEAR);
+
 	/*
 	 * SDHC has core_mem and hc_mem device memory and these memory
 	 * addresses do not fall within 1KB region. Hence, any update to
@@ -2034,6 +2514,29 @@
 	 * completed before its next update to registers within hc_mem.
 	 */
 	mb();
+	/*
+	 * There is a rare HW scenario where the first clear pulse could be
+	 * lost when actual reset and clear/read of status register is
+	 * happening at a time. Hence, retry for at least 10 times to make
+	 * sure status register is cleared. Otherwise, this will result in
+	 * a spurious power IRQ resulting in system instability.
+	 */
+	while (irq_status & sdhci_msm_readb_relaxed(host,
+		msm_host_offset->CORE_PWRCTL_STATUS)) {
+		if (retry == 0) {
+			pr_err("%s: Timedout clearing (0x%x) pwrctl status register\n",
+				mmc_hostname(host->mmc), irq_status);
+			sdhci_msm_dump_pwr_ctrl_regs(host);
+			BUG_ON(1);
+		}
+		sdhci_msm_writeb_relaxed(irq_status, host,
+			msm_host_offset->CORE_PWRCTL_CLEAR);
+		retry--;
+		udelay(10);
+	}
+	if (likely(retry < 10))
+		pr_debug("%s: success clearing (0x%x) pwrctl status register, retries left %d\n",
+				mmc_hostname(host->mmc), irq_status, retry);
 
 	/* Handle BUS ON/OFF*/
 	if (irq_status & CORE_PWRCTL_BUS_ON) {
@@ -2089,7 +2592,8 @@
 	}
 
 	/* ACK status to the core */
-	writeb_relaxed(irq_ack, (msm_host->core_mem + CORE_PWRCTL_CTL));
+	sdhci_msm_writeb_relaxed(irq_ack, host,
+			msm_host_offset->CORE_PWRCTL_CTL);
 	/*
 	 * SDHC has core_mem and hc_mem device memory and these memory
 	 * addresses do not fall within 1KB region. Hence, any update to
@@ -2098,14 +2602,17 @@
 	 */
 	mb();
 
-	if (io_level & REQ_IO_HIGH)
-		writel_relaxed((readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC) &
-				~CORE_IO_PAD_PWR_SWITCH),
-				host->ioaddr + CORE_VENDOR_SPEC);
-	else if (io_level & REQ_IO_LOW)
-		writel_relaxed((readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC) |
-				CORE_IO_PAD_PWR_SWITCH),
-				host->ioaddr + CORE_VENDOR_SPEC);
+	if ((io_level & REQ_IO_HIGH) && (msm_host->caps_0 & CORE_3_0V_SUPPORT))
+		writel_relaxed((readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC) &
+				~CORE_IO_PAD_PWR_SWITCH), host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC);
+	else if ((io_level & REQ_IO_LOW) ||
+			(msm_host->caps_0 & CORE_1_8V_SUPPORT))
+		writel_relaxed((readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC) |
+				CORE_IO_PAD_PWR_SWITCH), host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC);
 	mb();
 
 	pr_debug("%s: Handled IRQ(%d), ret=%d, ack=0x%x\n",
@@ -2190,6 +2697,8 @@
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 	unsigned long flags;
 	bool done = false;
 	u32 io_sig_sts;
@@ -2198,7 +2707,9 @@
 	pr_debug("%s: %s: request %d curr_pwr_state %x curr_io_level %x\n",
 			mmc_hostname(host->mmc), __func__, req_type,
 			msm_host->curr_pwr_state, msm_host->curr_io_level);
-	io_sig_sts = readl_relaxed(msm_host->core_mem + CORE_GENERICS);
+	io_sig_sts = sdhci_msm_readl_relaxed(host,
+			msm_host_offset->CORE_GENERICS);
+
 	/*
 	 * The IRQ for request type IO High/Low will be generated when -
 	 * 1. SWITCHABLE_SIGNALLING_VOL is enabled in HW.
@@ -2236,8 +2747,10 @@
 	 */
 	if (done)
 		init_completion(&msm_host->pwr_irq_completion);
-	else
-		wait_for_completion(&msm_host->pwr_irq_completion);
+	else if (!wait_for_completion_timeout(&msm_host->pwr_irq_completion,
+				msecs_to_jiffies(MSM_PWR_IRQ_TIMEOUT_MS)))
+		__WARN_printf("%s: request(%d) timed out waiting for pwr_irq\n",
+					mmc_hostname(host->mmc), req_type);
 
 	pr_debug("%s: %s: request %d done\n", mmc_hostname(host->mmc),
 			__func__, req_type);
@@ -2245,14 +2758,24 @@
 
 static void sdhci_msm_toggle_cdr(struct sdhci_host *host, bool enable)
 {
-	if (enable)
-		writel_relaxed((readl_relaxed(host->ioaddr +
-					      CORE_DLL_CONFIG) | CORE_CDR_EN),
-			       host->ioaddr + CORE_DLL_CONFIG);
-	else
-		writel_relaxed((readl_relaxed(host->ioaddr +
-					      CORE_DLL_CONFIG) & ~CORE_CDR_EN),
-			       host->ioaddr + CORE_DLL_CONFIG);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
+	u32 config = readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_DLL_CONFIG);
+
+	if (enable) {
+		config |= CORE_CDR_EN;
+		config &= ~CORE_CDR_EXT_EN;
+		writel_relaxed(config, host->ioaddr +
+			msm_host_offset->CORE_DLL_CONFIG);
+	} else {
+		config &= ~CORE_CDR_EN;
+		config |= CORE_CDR_EXT_EN;
+		writel_relaxed(config, host->ioaddr +
+			msm_host_offset->CORE_DLL_CONFIG);
+	}
 }
 
 static unsigned int sdhci_msm_max_segs(void)
@@ -2345,7 +2868,24 @@
 	return rc;
 }
 
+static void sdhci_msm_disable_controller_clock(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
 
+	if (atomic_read(&msm_host->controller_clock)) {
+		if (!IS_ERR(msm_host->clk))
+			clk_disable_unprepare(msm_host->clk);
+		if (!IS_ERR(msm_host->pclk))
+			clk_disable_unprepare(msm_host->pclk);
+		if (!IS_ERR(msm_host->ice_clk))
+			clk_disable_unprepare(msm_host->ice_clk);
+		sdhci_msm_bus_voting(host, 0);
+		atomic_set(&msm_host->controller_clock, 0);
+		pr_debug("%s: %s: disabled controller clock\n",
+			mmc_hostname(host->mmc), __func__);
+	}
+}
 
 static int sdhci_msm_prepare_clocks(struct sdhci_host *host, bool enable)
 {
@@ -2447,6 +2987,9 @@
 	int rc;
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
+	struct mmc_card *card = host->mmc->card;
 	struct mmc_ios	curr_ios = host->mmc->ios;
 	u32 sup_clock, ddr_clock, dll_lock;
 	bool curr_pwrsave;
@@ -2456,8 +2999,10 @@
 		 * disable pwrsave to ensure clock is not auto-gated until
 		 * the rate is >400KHz (initialization complete).
 		 */
-		writel_relaxed(readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC) &
-			~CORE_CLK_PWRSAVE, host->ioaddr + CORE_VENDOR_SPEC);
+		writel_relaxed(readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC) &
+			~CORE_CLK_PWRSAVE, host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC);
 		sdhci_msm_prepare_clocks(host, false);
 		host->clock = clock;
 		goto out;
@@ -2467,24 +3012,27 @@
 	if (rc)
 		goto out;
 
-	curr_pwrsave = !!(readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC) &
-			  CORE_CLK_PWRSAVE);
+	curr_pwrsave = !!(readl_relaxed(host->ioaddr +
+	msm_host_offset->CORE_VENDOR_SPEC) & CORE_CLK_PWRSAVE);
 	if ((clock > 400000) &&
-	    !curr_pwrsave && mmc_host_may_gate_card(host->mmc->card))
-		writel_relaxed(readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC)
-				| CORE_CLK_PWRSAVE,
-				host->ioaddr + CORE_VENDOR_SPEC);
+	    !curr_pwrsave && card && mmc_host_may_gate_card(card))
+		writel_relaxed(readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC)
+				| CORE_CLK_PWRSAVE, host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC);
 	/*
 	 * Disable pwrsave for a newly added card if doesn't allow clock
 	 * gating.
 	 */
-	else if (curr_pwrsave && !mmc_host_may_gate_card(host->mmc->card))
-		writel_relaxed(readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC)
-				& ~CORE_CLK_PWRSAVE,
-				host->ioaddr + CORE_VENDOR_SPEC);
+	else if (curr_pwrsave && card && !mmc_host_may_gate_card(card))
+		writel_relaxed(readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC)
+				& ~CORE_CLK_PWRSAVE, host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC);
 
 	sup_clock = sdhci_msm_get_sup_clk_rate(host, clock);
 	if ((curr_ios.timing == MMC_TIMING_UHS_DDR50) ||
+		(curr_ios.timing == MMC_TIMING_MMC_DDR52) ||
 		(curr_ios.timing == MMC_TIMING_MMC_HS400)) {
 		/*
 		 * The SDHC requires internal clock frequency to be double the
@@ -2516,24 +3064,28 @@
 	 */
 	if (curr_ios.timing == MMC_TIMING_MMC_HS400) {
 		/* Select the divided clock (free running MCLK/2) */
-		writel_relaxed(((readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC)
-					& ~CORE_HC_MCLK_SEL_MASK)
-					| CORE_HC_MCLK_SEL_HS400),
-					host->ioaddr + CORE_VENDOR_SPEC);
+		writel_relaxed(((readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC)
+				& ~CORE_HC_MCLK_SEL_MASK)
+				| CORE_HC_MCLK_SEL_HS400), host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC);
 		/*
 		 * Select HS400 mode using the HC_SELECT_IN from VENDOR SPEC
 		 * register
 		 */
-		if (msm_host->tuning_done && !msm_host->calibration_done) {
+		if ((msm_host->tuning_done ||
+				(card && mmc_card_strobe(card) &&
+				 msm_host->enhanced_strobe)) &&
+				!msm_host->calibration_done) {
 			/*
 			 * Write 0x6 to HC_SELECT_IN and 1 to HC_SELECT_IN_EN
 			 * field in VENDOR_SPEC_FUNC
 			 */
 			writel_relaxed((readl_relaxed(host->ioaddr + \
-					CORE_VENDOR_SPEC)
+					msm_host_offset->CORE_VENDOR_SPEC)
 					| CORE_HC_SELECT_IN_HS400
-					| CORE_HC_SELECT_IN_EN),
-					host->ioaddr + CORE_VENDOR_SPEC);
+					| CORE_HC_SELECT_IN_EN), host->ioaddr +
+					msm_host_offset->CORE_VENDOR_SPEC);
 		}
 		if (!host->mmc->ios.old_rate && !msm_host->use_cdclp533) {
 			/*
@@ -2541,7 +3093,8 @@
 			 * CORE_DLL_STATUS to be set.  This should get set
 			 * with in 15 us at 200 MHz.
 			 */
-			rc = readl_poll_timeout(host->ioaddr + CORE_DLL_STATUS,
+			rc = readl_poll_timeout(host->ioaddr +
+					msm_host_offset->CORE_DLL_STATUS,
 					dll_lock, (dll_lock & (CORE_DLL_LOCK |
 					CORE_DDR_DLL_LOCK)), 10, 1000);
 			if (rc == -ETIMEDOUT)
@@ -2553,14 +3106,16 @@
 		if (!msm_host->use_cdclp533)
 			/* set CORE_PWRSAVE_DLL bit in CORE_VENDOR_SPEC3 */
 			writel_relaxed((readl_relaxed(host->ioaddr +
-					CORE_VENDOR_SPEC3) & ~CORE_PWRSAVE_DLL),
-					host->ioaddr + CORE_VENDOR_SPEC3);
+					msm_host_offset->CORE_VENDOR_SPEC3)
+					& ~CORE_PWRSAVE_DLL), host->ioaddr +
+					msm_host_offset->CORE_VENDOR_SPEC3);
 
 		/* Select the default clock (free running MCLK) */
-		writel_relaxed(((readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC)
+		writel_relaxed(((readl_relaxed(host->ioaddr +
+					msm_host_offset->CORE_VENDOR_SPEC)
 					& ~CORE_HC_MCLK_SEL_MASK)
-					| CORE_HC_MCLK_SEL_DFLT),
-					host->ioaddr + CORE_VENDOR_SPEC);
+					| CORE_HC_MCLK_SEL_DFLT), host->ioaddr +
+					msm_host_offset->CORE_VENDOR_SPEC);
 
 		/*
 		 * Disable HC_SELECT_IN to be able to use the UHS mode select
@@ -2570,10 +3125,11 @@
 		 * Write 0 to HC_SELECT_IN and HC_SELECT_IN_EN field
 		 * in VENDOR_SPEC_FUNC
 		 */
-		writel_relaxed((readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC)
+		writel_relaxed((readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC)
 				& ~CORE_HC_SELECT_IN_EN
-				& ~CORE_HC_SELECT_IN_MASK),
-				host->ioaddr + CORE_VENDOR_SPEC);
+				& ~CORE_HC_SELECT_IN_MASK), host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC);
 	}
 	mb();
 
@@ -2604,14 +3160,16 @@
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 	u16 ctrl_2;
 
 	ctrl_2 = sdhci_readw(host, SDHCI_HOST_CONTROL2);
 	/* Select Bus Speed Mode for host */
 	ctrl_2 &= ~SDHCI_CTRL_UHS_MASK;
-	if (uhs == MMC_TIMING_MMC_HS400)
-		ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
-	else if (uhs == MMC_TIMING_MMC_HS200)
+	if ((uhs == MMC_TIMING_MMC_HS400) ||
+		(uhs == MMC_TIMING_MMC_HS200) ||
+		(uhs == MMC_TIMING_UHS_SDR104))
 		ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
 	else if (uhs == MMC_TIMING_UHS_SDR12)
 		ctrl_2 |= SDHCI_CTRL_UHS_SDR12;
@@ -2619,9 +3177,8 @@
 		ctrl_2 |= SDHCI_CTRL_UHS_SDR25;
 	else if (uhs == MMC_TIMING_UHS_SDR50)
 		ctrl_2 |= SDHCI_CTRL_UHS_SDR50;
-	else if (uhs == MMC_TIMING_UHS_SDR104)
-		ctrl_2 |= SDHCI_CTRL_UHS_SDR104;
-	else if (uhs == MMC_TIMING_UHS_DDR50)
+	else if ((uhs == MMC_TIMING_UHS_DDR50) ||
+		 (uhs == MMC_TIMING_MMC_DDR52))
 		ctrl_2 |= SDHCI_CTRL_UHS_DDR50;
 	/*
 	 * When clock frquency is less than 100MHz, the feedback clock must be
@@ -2640,14 +3197,16 @@
 		 *
 		 * Write 1 to DLL_RST bit of DLL_CONFIG register
 		 */
-		writel_relaxed((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG)
-				| CORE_DLL_RST),
-				host->ioaddr + CORE_DLL_CONFIG);
+		writel_relaxed((readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_DLL_CONFIG)
+				| CORE_DLL_RST), host->ioaddr +
+				msm_host_offset->CORE_DLL_CONFIG);
 
 		/* Write 1 to DLL_PDN bit of DLL_CONFIG register */
-		writel_relaxed((readl_relaxed(host->ioaddr + CORE_DLL_CONFIG)
-				| CORE_DLL_PDN),
-				host->ioaddr + CORE_DLL_CONFIG);
+		writel_relaxed((readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_DLL_CONFIG)
+				| CORE_DLL_PDN), host->ioaddr +
+				msm_host_offset->CORE_DLL_CONFIG);
 		mb();
 
 		/*
@@ -2663,30 +3222,80 @@
 
 }
 
-#define MAX_TEST_BUS 20
+#define MAX_TEST_BUS 60
+#define DRV_NAME "cmdq-host"
+static void sdhci_msm_cmdq_dump_debug_ram(struct sdhci_host *host)
+{
+	int i = 0;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
+	struct cmdq_host *cq_host = host->cq_host;
+
+	u32 version = sdhci_msm_readl_relaxed(host,
+		msm_host_offset->CORE_MCI_VERSION);
+	u16 minor = version & CORE_VERSION_TARGET_MASK;
+	/* registers offset changed starting from 4.2.0 */
+	int offset = minor >= SDHCI_MSM_VER_420 ? 0 : 0x48;
+
+	pr_err("---- Debug RAM dump ----\n");
+	pr_err(DRV_NAME ": Debug RAM wrap-around: 0x%08x | Debug RAM overlap: 0x%08x\n",
+	       cmdq_readl(cq_host, CQ_CMD_DBG_RAM_WA + offset),
+	       cmdq_readl(cq_host, CQ_CMD_DBG_RAM_OL + offset));
+
+	while (i < 16) {
+		pr_err(DRV_NAME ": Debug RAM dump [%d]: 0x%08x\n", i,
+		       cmdq_readl(cq_host, CQ_CMD_DBG_RAM + offset + (4 * i)));
+		i++;
+	}
+	pr_err("-------------------------\n");
+}
 
 void sdhci_msm_dump_vendor_regs(struct sdhci_host *host)
 {
 	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
 	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 	int tbsel, tbsel2;
 	int i, index = 0;
 	u32 test_bus_val = 0;
 	u32 debug_reg[MAX_TEST_BUS] = {0};
 
 	pr_info("----------- VENDOR REGISTER DUMP -----------\n");
+	if (host->cq_host)
+		sdhci_msm_cmdq_dump_debug_ram(host);
+
+	MMC_TRACE(host->mmc, "Data cnt: 0x%08x | Fifo cnt: 0x%08x\n",
+		sdhci_msm_readl_relaxed(host,
+			msm_host_offset->CORE_MCI_DATA_CNT),
+		sdhci_msm_readl_relaxed(host,
+			msm_host_offset->CORE_MCI_FIFO_CNT));
 	pr_info("Data cnt: 0x%08x | Fifo cnt: 0x%08x | Int sts: 0x%08x\n",
-		readl_relaxed(msm_host->core_mem + CORE_MCI_DATA_CNT),
-		readl_relaxed(msm_host->core_mem + CORE_MCI_FIFO_CNT),
-		readl_relaxed(msm_host->core_mem + CORE_MCI_STATUS));
+		sdhci_msm_readl_relaxed(host,
+			msm_host_offset->CORE_MCI_DATA_CNT),
+		sdhci_msm_readl_relaxed(host,
+			msm_host_offset->CORE_MCI_FIFO_CNT),
+		sdhci_msm_readl_relaxed(host,
+			msm_host_offset->CORE_MCI_STATUS));
 	pr_info("DLL cfg:  0x%08x | DLL sts:  0x%08x | SDCC ver: 0x%08x\n",
-		readl_relaxed(host->ioaddr + CORE_DLL_CONFIG),
-		readl_relaxed(host->ioaddr + CORE_DLL_STATUS),
-		readl_relaxed(msm_host->core_mem + CORE_MCI_VERSION));
+		readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DLL_CONFIG),
+		readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_DLL_STATUS),
+		sdhci_msm_readl_relaxed(host,
+			msm_host_offset->CORE_MCI_VERSION));
 	pr_info("Vndr func: 0x%08x | Vndr adma err : addr0: 0x%08x addr1: 0x%08x\n",
-		readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC),
-		readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC_ADMA_ERR_ADDR0),
-		readl_relaxed(host->ioaddr + CORE_VENDOR_SPEC_ADMA_ERR_ADDR1));
+		readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC),
+		readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC_ADMA_ERR_ADDR0),
+		readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC_ADMA_ERR_ADDR1));
+	pr_info("Vndr func2: 0x%08x\n",
+		readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC_FUNC2));
 
 	/*
 	 * tbsel indicates [2:0] bits and tbsel2 indicates [7:4] bits
@@ -2697,31 +3306,609 @@
 	 * to select test bus 14, write 0x1E to CORE_TESTBUS_CONFIG register
 	 * i.e., tbsel2[7:4] = 0001, tbsel[2:0] = 110.
 	 */
-	for (tbsel2 = 0; tbsel2 < 3; tbsel2++) {
+	for (tbsel2 = 0; tbsel2 < 7; tbsel2++) {
 		for (tbsel = 0; tbsel < 8; tbsel++) {
 			if (index >= MAX_TEST_BUS)
 				break;
-			test_bus_val = (tbsel2 << CORE_TESTBUS_SEL2_BIT) |
-					tbsel | CORE_TESTBUS_ENA;
-			writel_relaxed(test_bus_val,
-				msm_host->core_mem + CORE_TESTBUS_CONFIG);
-			debug_reg[index++] = readl_relaxed(msm_host->core_mem +
-							CORE_SDCC_DEBUG_REG);
+			test_bus_val =
+			(tbsel2 << msm_host_offset->CORE_TESTBUS_SEL2_BIT) |
+				tbsel | msm_host_offset->CORE_TESTBUS_ENA;
+			sdhci_msm_writel_relaxed(test_bus_val, host,
+				msm_host_offset->CORE_TESTBUS_CONFIG);
+			debug_reg[index++] = sdhci_msm_readl_relaxed(host,
+				msm_host_offset->CORE_SDCC_DEBUG_REG);
 		}
 	}
 	for (i = 0; i < MAX_TEST_BUS; i = i + 4)
 		pr_info(" Test bus[%d to %d]: 0x%08x 0x%08x 0x%08x 0x%08x\n",
 				i, i + 3, debug_reg[i], debug_reg[i+1],
 				debug_reg[i+2], debug_reg[i+3]);
-	/* Disable test bus */
-	writel_relaxed(~CORE_TESTBUS_ENA, msm_host->core_mem +
-			CORE_TESTBUS_CONFIG);
+}
+
+/*
+ * sdhci_msm_enhanced_strobe_mask :-
+ * Before running CMDQ transfers in HS400 Enhanced Strobe mode,
+ * SW should write 3 to
+ * HC_VENDOR_SPECIFIC_FUNC3.CMDEN_HS400_INPUT_MASK_CNT register.
+ * The default reset value of this register is 2.
+ */
+static void sdhci_msm_enhanced_strobe_mask(struct sdhci_host *host, bool set)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
+
+	if (!msm_host->enhanced_strobe ||
+			!mmc_card_strobe(msm_host->mmc->card)) {
+		pr_debug("%s: host/card does not support hs400 enhanced strobe\n",
+				mmc_hostname(host->mmc));
+		return;
+	}
+
+	if (set) {
+		writel_relaxed((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC3)
+			| CORE_CMDEN_HS400_INPUT_MASK_CNT),
+			host->ioaddr + msm_host_offset->CORE_VENDOR_SPEC3);
+	} else {
+		writel_relaxed((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC3)
+			& ~CORE_CMDEN_HS400_INPUT_MASK_CNT),
+			host->ioaddr + msm_host_offset->CORE_VENDOR_SPEC3);
+	}
+}
+
+static void sdhci_msm_clear_set_dumpregs(struct sdhci_host *host, bool set)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
+
+	if (set) {
+		sdhci_msm_writel_relaxed(msm_host_offset->CORE_TESTBUS_ENA,
+			host, msm_host_offset->CORE_TESTBUS_CONFIG);
+	} else {
+		u32 value;
+
+		value = sdhci_msm_readl_relaxed(host,
+			msm_host_offset->CORE_TESTBUS_CONFIG);
+		value &= ~(msm_host_offset->CORE_TESTBUS_ENA);
+		sdhci_msm_writel_relaxed(value, host,
+			msm_host_offset->CORE_TESTBUS_CONFIG);
+	}
+}
+
+void sdhci_msm_reset_workaround(struct sdhci_host *host, u32 enable)
+{
+	u32 vendor_func2;
+	unsigned long timeout;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
+
+	vendor_func2 = readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_VENDOR_SPEC_FUNC2);
+
+	if (enable) {
+		writel_relaxed(vendor_func2 | HC_SW_RST_REQ, host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC_FUNC2);
+		timeout = 10000;
+		while (readl_relaxed(host->ioaddr +
+		msm_host_offset->CORE_VENDOR_SPEC_FUNC2) & HC_SW_RST_REQ) {
+			if (timeout == 0) {
+				pr_info("%s: Applying wait idle disable workaround\n",
+					mmc_hostname(host->mmc));
+				/*
+				 * Apply the reset workaround to not wait for
+				 * pending data transfers on AXI before
+				 * resetting the controller. This could be
+				 * risky if the transfers were stuck on the
+				 * AXI bus.
+				 */
+				vendor_func2 = readl_relaxed(host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC_FUNC2);
+				writel_relaxed(vendor_func2 |
+				HC_SW_RST_WAIT_IDLE_DIS, host->ioaddr +
+				msm_host_offset->CORE_VENDOR_SPEC_FUNC2);
+				host->reset_wa_t = ktime_get();
+				return;
+			}
+			timeout--;
+			udelay(10);
+		}
+		pr_info("%s: waiting for SW_RST_REQ is successful\n",
+				mmc_hostname(host->mmc));
+	} else {
+		writel_relaxed(vendor_func2 & ~HC_SW_RST_WAIT_IDLE_DIS,
+			host->ioaddr + msm_host_offset->CORE_VENDOR_SPEC_FUNC2);
+	}
+}
+
+static void sdhci_msm_pm_qos_irq_unvote_work(struct work_struct *work)
+{
+	struct sdhci_msm_pm_qos_irq *pm_qos_irq =
+		container_of(work, struct sdhci_msm_pm_qos_irq,
+			     unvote_work.work);
+
+	if (atomic_read(&pm_qos_irq->counter))
+		return;
+
+	pm_qos_irq->latency = PM_QOS_DEFAULT_VALUE;
+	pm_qos_update_request(&pm_qos_irq->req, pm_qos_irq->latency);
+}
+
+void sdhci_msm_pm_qos_irq_vote(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	struct sdhci_msm_pm_qos_latency *latency =
+		&msm_host->pdata->pm_qos_data.irq_latency;
+	int counter;
+
+	if (!msm_host->pm_qos_irq.enabled)
+		return;
+
+	counter = atomic_inc_return(&msm_host->pm_qos_irq.counter);
+	/* Make sure to update the voting in case power policy has changed */
+	if (msm_host->pm_qos_irq.latency == latency->latency[host->power_policy]
+		&& counter > 1)
+		return;
+
+	cancel_delayed_work_sync(&msm_host->pm_qos_irq.unvote_work);
+	msm_host->pm_qos_irq.latency = latency->latency[host->power_policy];
+	pm_qos_update_request(&msm_host->pm_qos_irq.req,
+				msm_host->pm_qos_irq.latency);
+}
+
+void sdhci_msm_pm_qos_irq_unvote(struct sdhci_host *host, bool async)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int counter;
+
+	if (!msm_host->pm_qos_irq.enabled)
+		return;
+
+	if (atomic_read(&msm_host->pm_qos_irq.counter)) {
+		counter = atomic_dec_return(&msm_host->pm_qos_irq.counter);
+	} else {
+		WARN(1, "attempt to decrement pm_qos_irq.counter when it's 0");
+		return;
+	}
+
+	if (counter)
+		return;
+
+	if (async) {
+		schedule_delayed_work(&msm_host->pm_qos_irq.unvote_work,
+				      msecs_to_jiffies(QOS_REMOVE_DELAY_MS));
+		return;
+	}
+
+	msm_host->pm_qos_irq.latency = PM_QOS_DEFAULT_VALUE;
+	pm_qos_update_request(&msm_host->pm_qos_irq.req,
+			msm_host->pm_qos_irq.latency);
+}
+
+static ssize_t
+sdhci_msm_pm_qos_irq_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	struct sdhci_msm_pm_qos_irq *irq = &msm_host->pm_qos_irq;
+
+	return snprintf(buf, PAGE_SIZE,
+		"IRQ PM QoS: enabled=%d, counter=%d, latency=%d\n",
+		irq->enabled, atomic_read(&irq->counter), irq->latency);
+}
+
+static ssize_t
+sdhci_msm_pm_qos_irq_enable_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+
+	return snprintf(buf, PAGE_SIZE, "%u\n", msm_host->pm_qos_irq.enabled);
+}
+
+static ssize_t
+sdhci_msm_pm_qos_irq_enable_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	uint32_t value;
+	bool enable;
+	int ret;
+
+	ret = kstrtou32(buf, 0, &value);
+	if (ret)
+		goto out;
+	enable = !!value;
+
+	if (enable == msm_host->pm_qos_irq.enabled)
+		goto out;
+
+	msm_host->pm_qos_irq.enabled = enable;
+	if (!enable) {
+		cancel_delayed_work_sync(&msm_host->pm_qos_irq.unvote_work);
+		atomic_set(&msm_host->pm_qos_irq.counter, 0);
+		msm_host->pm_qos_irq.latency = PM_QOS_DEFAULT_VALUE;
+		pm_qos_update_request(&msm_host->pm_qos_irq.req,
+				msm_host->pm_qos_irq.latency);
+	}
+
+out:
+	return count;
+}
+
+#ifdef CONFIG_SMP
+static inline void set_affine_irq(struct sdhci_msm_host *msm_host,
+				struct sdhci_host *host)
+{
+	msm_host->pm_qos_irq.req.irq = host->irq;
+}
+#else
+static inline void set_affine_irq(struct sdhci_msm_host *msm_host,
+				struct sdhci_host *host) { }
+#endif
+
+void sdhci_msm_pm_qos_irq_init(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	struct sdhci_msm_pm_qos_latency *irq_latency;
+	int ret;
+
+	if (!msm_host->pdata->pm_qos_data.irq_valid)
+		return;
+
+	/* Initialize only once as this gets called per partition */
+	if (msm_host->pm_qos_irq.enabled)
+		return;
+
+	atomic_set(&msm_host->pm_qos_irq.counter, 0);
+	msm_host->pm_qos_irq.req.type =
+			msm_host->pdata->pm_qos_data.irq_req_type;
+	if ((msm_host->pm_qos_irq.req.type != PM_QOS_REQ_AFFINE_CORES) &&
+		(msm_host->pm_qos_irq.req.type != PM_QOS_REQ_ALL_CORES))
+		set_affine_irq(msm_host, host);
+	else
+		cpumask_copy(&msm_host->pm_qos_irq.req.cpus_affine,
+			cpumask_of(msm_host->pdata->pm_qos_data.irq_cpu));
+
+	INIT_DELAYED_WORK(&msm_host->pm_qos_irq.unvote_work,
+		sdhci_msm_pm_qos_irq_unvote_work);
+	/* For initialization phase, set the performance latency */
+	irq_latency = &msm_host->pdata->pm_qos_data.irq_latency;
+	msm_host->pm_qos_irq.latency =
+		irq_latency->latency[SDHCI_PERFORMANCE_MODE];
+	pm_qos_add_request(&msm_host->pm_qos_irq.req, PM_QOS_CPU_DMA_LATENCY,
+			msm_host->pm_qos_irq.latency);
+	msm_host->pm_qos_irq.enabled = true;
+
+	/* sysfs */
+	msm_host->pm_qos_irq.enable_attr.show =
+		sdhci_msm_pm_qos_irq_enable_show;
+	msm_host->pm_qos_irq.enable_attr.store =
+		sdhci_msm_pm_qos_irq_enable_store;
+	sysfs_attr_init(&msm_host->pm_qos_irq.enable_attr.attr);
+	msm_host->pm_qos_irq.enable_attr.attr.name = "pm_qos_irq_enable";
+	msm_host->pm_qos_irq.enable_attr.attr.mode = S_IRUGO | S_IWUSR;
+	ret = device_create_file(&msm_host->pdev->dev,
+		&msm_host->pm_qos_irq.enable_attr);
+	if (ret)
+		pr_err("%s: fail to create pm_qos_irq_enable (%d)\n",
+			__func__, ret);
+
+	msm_host->pm_qos_irq.status_attr.show = sdhci_msm_pm_qos_irq_show;
+	msm_host->pm_qos_irq.status_attr.store = NULL;
+	sysfs_attr_init(&msm_host->pm_qos_irq.status_attr.attr);
+	msm_host->pm_qos_irq.status_attr.attr.name = "pm_qos_irq_status";
+	msm_host->pm_qos_irq.status_attr.attr.mode = S_IRUGO;
+	ret = device_create_file(&msm_host->pdev->dev,
+			&msm_host->pm_qos_irq.status_attr);
+	if (ret)
+		pr_err("%s: fail to create pm_qos_irq_status (%d)\n",
+			__func__, ret);
+}
+
+static ssize_t sdhci_msm_pm_qos_group_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	struct sdhci_msm_pm_qos_group *group;
+	int i;
+	int nr_groups = msm_host->pdata->pm_qos_data.cpu_group_map.nr_groups;
+	int offset = 0;
+
+	for (i = 0; i < nr_groups; i++) {
+		group = &msm_host->pm_qos[i];
+		offset += snprintf(&buf[offset], PAGE_SIZE,
+			"Group #%d (mask=0x%lx) PM QoS: enabled=%d, counter=%d, latency=%d\n",
+			i, group->req.cpus_affine.bits[0],
+			msm_host->pm_qos_group_enable,
+			atomic_read(&group->counter),
+			group->latency);
+	}
+
+	return offset;
+}
+
+static ssize_t sdhci_msm_pm_qos_group_enable_show(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+
+	return snprintf(buf, PAGE_SIZE, "%s\n",
+		msm_host->pm_qos_group_enable ? "enabled" : "disabled");
+}
+
+static ssize_t sdhci_msm_pm_qos_group_enable_store(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int nr_groups = msm_host->pdata->pm_qos_data.cpu_group_map.nr_groups;
+	uint32_t value;
+	bool enable;
+	int ret;
+	int i;
+
+	ret = kstrtou32(buf, 0, &value);
+	if (ret)
+		goto out;
+	enable = !!value;
+
+	if (enable == msm_host->pm_qos_group_enable)
+		goto out;
+
+	msm_host->pm_qos_group_enable = enable;
+	if (!enable) {
+		for (i = 0; i < nr_groups; i++) {
+			cancel_delayed_work_sync(
+				&msm_host->pm_qos[i].unvote_work);
+			atomic_set(&msm_host->pm_qos[i].counter, 0);
+			msm_host->pm_qos[i].latency = PM_QOS_DEFAULT_VALUE;
+			pm_qos_update_request(&msm_host->pm_qos[i].req,
+				msm_host->pm_qos[i].latency);
+		}
+	}
+
+out:
+	return count;
+}
+
+static int sdhci_msm_get_cpu_group(struct sdhci_msm_host *msm_host, int cpu)
+{
+	int i;
+	struct sdhci_msm_cpu_group_map *map =
+			&msm_host->pdata->pm_qos_data.cpu_group_map;
+
+	if (cpu < 0)
+		goto not_found;
+
+	for (i = 0; i < map->nr_groups; i++)
+		if (cpumask_test_cpu(cpu, &map->mask[i]))
+			return i;
+
+not_found:
+	return -EINVAL;
+}
+
+void sdhci_msm_pm_qos_cpu_vote(struct sdhci_host *host,
+		struct sdhci_msm_pm_qos_latency *latency, int cpu)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int group = sdhci_msm_get_cpu_group(msm_host, cpu);
+	struct sdhci_msm_pm_qos_group *pm_qos_group;
+	int counter;
+
+	if (!msm_host->pm_qos_group_enable || group < 0)
+		return;
+
+	pm_qos_group = &msm_host->pm_qos[group];
+	counter = atomic_inc_return(&pm_qos_group->counter);
+
+	/* Make sure to update the voting in case power policy has changed */
+	if (pm_qos_group->latency == latency->latency[host->power_policy]
+		&& counter > 1)
+		return;
+
+	cancel_delayed_work_sync(&pm_qos_group->unvote_work);
+
+	pm_qos_group->latency = latency->latency[host->power_policy];
+	pm_qos_update_request(&pm_qos_group->req, pm_qos_group->latency);
+}
+
+static void sdhci_msm_pm_qos_cpu_unvote_work(struct work_struct *work)
+{
+	struct sdhci_msm_pm_qos_group *group =
+		container_of(work, struct sdhci_msm_pm_qos_group,
+			     unvote_work.work);
+
+	if (atomic_read(&group->counter))
+		return;
+
+	group->latency = PM_QOS_DEFAULT_VALUE;
+	pm_qos_update_request(&group->req, group->latency);
+}
+
+bool sdhci_msm_pm_qos_cpu_unvote(struct sdhci_host *host, int cpu, bool async)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int group = sdhci_msm_get_cpu_group(msm_host, cpu);
+
+	if (!msm_host->pm_qos_group_enable || group < 0 ||
+		atomic_dec_return(&msm_host->pm_qos[group].counter))
+		return false;
+
+	if (async) {
+		schedule_delayed_work(&msm_host->pm_qos[group].unvote_work,
+				      msecs_to_jiffies(QOS_REMOVE_DELAY_MS));
+		return true;
+	}
+
+	msm_host->pm_qos[group].latency = PM_QOS_DEFAULT_VALUE;
+	pm_qos_update_request(&msm_host->pm_qos[group].req,
+				msm_host->pm_qos[group].latency);
+	return true;
+}
+
+void sdhci_msm_pm_qos_cpu_init(struct sdhci_host *host,
+		struct sdhci_msm_pm_qos_latency *latency)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int nr_groups = msm_host->pdata->pm_qos_data.cpu_group_map.nr_groups;
+	struct sdhci_msm_pm_qos_group *group;
+	int i;
+	int ret;
+
+	if (msm_host->pm_qos_group_enable)
+		return;
+
+	msm_host->pm_qos = kcalloc(nr_groups, sizeof(*msm_host->pm_qos),
+			GFP_KERNEL);
+	if (!msm_host->pm_qos)
+		return;
+
+	for (i = 0; i < nr_groups; i++) {
+		group = &msm_host->pm_qos[i];
+		INIT_DELAYED_WORK(&group->unvote_work,
+			sdhci_msm_pm_qos_cpu_unvote_work);
+		atomic_set(&group->counter, 0);
+		group->req.type = PM_QOS_REQ_AFFINE_CORES;
+		cpumask_copy(&group->req.cpus_affine,
+			&msm_host->pdata->pm_qos_data.cpu_group_map.mask[i]);
+		/* For initialization phase, set the performance mode latency */
+		group->latency = latency[i].latency[SDHCI_PERFORMANCE_MODE];
+		pm_qos_add_request(&group->req, PM_QOS_CPU_DMA_LATENCY,
+			group->latency);
+		pr_info("%s (): voted for group #%d (mask=0x%lx) latency=%d (0x%p)\n",
+			__func__, i,
+			group->req.cpus_affine.bits[0],
+			group->latency,
+			&latency[i].latency[SDHCI_PERFORMANCE_MODE]);
+	}
+	msm_host->pm_qos_prev_cpu = -1;
+	msm_host->pm_qos_group_enable = true;
+
+	/* sysfs */
+	msm_host->pm_qos_group_status_attr.show = sdhci_msm_pm_qos_group_show;
+	msm_host->pm_qos_group_status_attr.store = NULL;
+	sysfs_attr_init(&msm_host->pm_qos_group_status_attr.attr);
+	msm_host->pm_qos_group_status_attr.attr.name =
+			"pm_qos_cpu_groups_status";
+	msm_host->pm_qos_group_status_attr.attr.mode = S_IRUGO;
+	ret = device_create_file(&msm_host->pdev->dev,
+			&msm_host->pm_qos_group_status_attr);
+	if (ret)
+		dev_err(&msm_host->pdev->dev, "%s: fail to create pm_qos_group_status_attr (%d)\n",
+			__func__, ret);
+	msm_host->pm_qos_group_enable_attr.show =
+			sdhci_msm_pm_qos_group_enable_show;
+	msm_host->pm_qos_group_enable_attr.store =
+			sdhci_msm_pm_qos_group_enable_store;
+	sysfs_attr_init(&msm_host->pm_qos_group_enable_attr.attr);
+	msm_host->pm_qos_group_enable_attr.attr.name =
+			"pm_qos_cpu_groups_enable";
+	msm_host->pm_qos_group_enable_attr.attr.mode = S_IRUGO;
+	ret = device_create_file(&msm_host->pdev->dev,
+			&msm_host->pm_qos_group_enable_attr);
+	if (ret)
+		dev_err(&msm_host->pdev->dev, "%s: fail to create pm_qos_group_enable_attr (%d)\n",
+			__func__, ret);
+}
+
+static void sdhci_msm_pre_req(struct sdhci_host *host,
+		struct mmc_request *mmc_req)
+{
+	int cpu;
+	int group;
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int prev_group = sdhci_msm_get_cpu_group(msm_host,
+			msm_host->pm_qos_prev_cpu);
+
+	sdhci_msm_pm_qos_irq_vote(host);
+
+	cpu = get_cpu();
+	put_cpu();
+	group = sdhci_msm_get_cpu_group(msm_host, cpu);
+	if (group < 0)
+		return;
+
+	if (group != prev_group && prev_group >= 0) {
+		sdhci_msm_pm_qos_cpu_unvote(host,
+				msm_host->pm_qos_prev_cpu, false);
+		prev_group = -1; /* make sure to vote for new group */
+	}
+
+	if (prev_group < 0) {
+		sdhci_msm_pm_qos_cpu_vote(host,
+				msm_host->pdata->pm_qos_data.latency, cpu);
+		msm_host->pm_qos_prev_cpu = cpu;
+	}
+}
+
+static void sdhci_msm_post_req(struct sdhci_host *host,
+				struct mmc_request *mmc_req)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+
+	sdhci_msm_pm_qos_irq_unvote(host, false);
+
+	if (sdhci_msm_pm_qos_cpu_unvote(host, msm_host->pm_qos_prev_cpu, false))
+			msm_host->pm_qos_prev_cpu = -1;
+}
+
+static void sdhci_msm_init(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+
+	sdhci_msm_pm_qos_irq_init(host);
+
+	if (msm_host->pdata->pm_qos_data.legacy_valid)
+		sdhci_msm_pm_qos_cpu_init(host,
+				msm_host->pdata->pm_qos_data.latency);
+}
+
+static unsigned int sdhci_msm_get_current_limit(struct sdhci_host *host)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	struct sdhci_msm_slot_reg_data *curr_slot = msm_host->pdata->vreg_data;
+	u32 max_curr = 0;
+
+	if (curr_slot && curr_slot->vdd_data)
+		max_curr = curr_slot->vdd_data->hpm_uA;
+
+	return max_curr;
 }
 
 static struct sdhci_ops sdhci_msm_ops = {
 	.set_uhs_signaling = sdhci_msm_set_uhs_signaling,
 	.check_power_status = sdhci_msm_check_power_status,
 	.platform_execute_tuning = sdhci_msm_execute_tuning,
+	.enhanced_strobe = sdhci_msm_enhanced_strobe,
 	.toggle_cdr = sdhci_msm_toggle_cdr,
 	.get_max_segments = sdhci_msm_max_segs,
 	.set_clock = sdhci_msm_set_clock,
@@ -2730,51 +3917,185 @@
 	.dump_vendor_regs = sdhci_msm_dump_vendor_regs,
 	.config_auto_tuning_cmd = sdhci_msm_config_auto_tuning_cmd,
 	.enable_controller_clock = sdhci_msm_enable_controller_clock,
+	.set_bus_width = sdhci_set_bus_width,
+	.reset = sdhci_reset,
+	.clear_set_dumpregs = sdhci_msm_clear_set_dumpregs,
+	.enhanced_strobe_mask = sdhci_msm_enhanced_strobe_mask,
+	.reset_workaround = sdhci_msm_reset_workaround,
+	.init = sdhci_msm_init,
+	.pre_req = sdhci_msm_pre_req,
+	.post_req = sdhci_msm_post_req,
+	.get_current_limit = sdhci_msm_get_current_limit,
 };
 
 static void sdhci_set_default_hw_caps(struct sdhci_msm_host *msm_host,
 		struct sdhci_host *host)
 {
-	u32 version, caps;
+	u32 version, caps = 0;
 	u16 minor;
 	u8 major;
+	u32 val;
+	const struct sdhci_msm_offset *msm_host_offset =
+					msm_host->offset;
 
-	version = readl_relaxed(msm_host->core_mem + CORE_MCI_VERSION);
+	version = sdhci_msm_readl_relaxed(host,
+		msm_host_offset->CORE_MCI_VERSION);
 	major = (version & CORE_VERSION_MAJOR_MASK) >>
 			CORE_VERSION_MAJOR_SHIFT;
 	minor = version & CORE_VERSION_TARGET_MASK;
 
+	caps = readl_relaxed(host->ioaddr + SDHCI_CAPABILITIES);
+
 	/*
 	 * Starting with SDCC 5 controller (core major version = 1)
-	 * controller won't advertise 3.0v and 8-bit features except for
-	 * some targets.
+	 * controller won't advertise 3.0v, 1.8v and 8-bit features
+	 * except for some targets.
 	 */
 	if (major >= 1 && minor != 0x11 && minor != 0x12) {
-		caps = CORE_3_0V_SUPPORT;
+		struct sdhci_msm_reg_data *vdd_io_reg;
+		/*
+		 * Enable 1.8V support capability on controllers that
+		 * support dual voltage
+		 */
+		vdd_io_reg = msm_host->pdata->vreg_data->vdd_io_data;
+		if (vdd_io_reg && (vdd_io_reg->high_vol_level > 2700000))
+			caps |= CORE_3_0V_SUPPORT;
+		if (vdd_io_reg && (vdd_io_reg->low_vol_level < 1950000))
+			caps |= CORE_1_8V_SUPPORT;
 		if (msm_host->pdata->mmc_bus_width == MMC_CAP_8_BIT_DATA)
 			caps |= CORE_8_BIT_SUPPORT;
-		writel_relaxed(
-			(readl_relaxed(host->ioaddr + SDHCI_CAPABILITIES) |
-			caps), host->ioaddr + CORE_VENDOR_SPEC_CAPABILITIES0);
 	}
 
 	/*
+	 * Enable one MID mode for SDCC5 (major 1) on 8916/8939 (minor 0x2e) and
+	 * on 8992 (minor 0x3e) as a workaround to reset for data stuck issue.
+	 */
+	if (major == 1 && (minor == 0x2e || minor == 0x3e)) {
+		host->quirks2 |= SDHCI_QUIRK2_USE_RESET_WORKAROUND;
+		val = readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC_FUNC2);
+		writel_relaxed((val | CORE_ONE_MID_EN),
+			host->ioaddr + msm_host_offset->CORE_VENDOR_SPEC_FUNC2);
+	}
+	/*
 	 * SDCC 5 controller with major version 1, minor version 0x34 and later
 	 * with HS 400 mode support will use CM DLL instead of CDC LP 533 DLL.
 	 */
 	if ((major == 1) && (minor < 0x34))
 		msm_host->use_cdclp533 = true;
+
+	/*
+	 * SDCC 5 controller with major version 1, minor version 0x42 and later
+	 * will require additional steps when resetting DLL.
+	 * It also supports HS400 enhanced strobe mode.
+	 */
+	if ((major == 1) && (minor >= 0x42)) {
+		msm_host->use_updated_dll_reset = true;
+		msm_host->enhanced_strobe = true;
+	}
+
+	/*
+	 * SDCC 5 controller with major version 1 and minor version 0x42,
+	 * 0x46 and 0x49 currently uses 14lpp tech DLL whose internal
+	 * gating cannot guarantee MCLK timing requirement i.e.
+	 * when MCLK is gated OFF, it is not gated for less than 0.5us
+	 * and MCLK must be switched on for at-least 1us before DATA
+	 * starts coming.
+	 */
+	if ((major == 1) && ((minor == 0x42) || (minor == 0x46) ||
+				(minor == 0x49)))
+		msm_host->use_14lpp_dll = true;
+
+	/* Fake 3.0V support for SDIO devices which requires such voltage */
+	if (msm_host->pdata->core_3_0v_support) {
+		caps |= CORE_3_0V_SUPPORT;
+			writel_relaxed((readl_relaxed(host->ioaddr +
+			SDHCI_CAPABILITIES) | caps), host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC_CAPABILITIES0);
+	}
+
+	if ((major == 1) && (minor >= 0x49))
+		msm_host->rclk_delay_fix = true;
+	/*
+	 * Mask 64-bit support for controller with 32-bit address bus so that
+	 * smaller descriptor size will be used and improve memory consumption.
+	 */
+	if (!msm_host->pdata->largeaddressbus)
+		caps &= ~CORE_SYS_BUS_SUPPORT_64_BIT;
+
+	writel_relaxed(caps, host->ioaddr +
+		msm_host_offset->CORE_VENDOR_SPEC_CAPABILITIES0);
+	/* keep track of the value in SDHCI_CAPABILITIES */
+	msm_host->caps_0 = caps;
+
+	if ((major == 1) && (minor >= 0x6b))
+		msm_host->ice_hci_support = true;
+}
+
+#ifdef CONFIG_MMC_CQ_HCI
+static void sdhci_msm_cmdq_init(struct sdhci_host *host,
+				struct platform_device *pdev)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+
+	if (nocmdq) {
+		dev_dbg(&pdev->dev, "CMDQ disabled via cmdline\n");
+		return;
+	}
+
+	host->cq_host = cmdq_pltfm_init(pdev);
+	if (IS_ERR(host->cq_host)) {
+		dev_dbg(&pdev->dev, "cmdq-pltfm init: failed: %ld\n",
+			PTR_ERR(host->cq_host));
+		host->cq_host = NULL;
+	} else {
+		msm_host->mmc->caps2 |= MMC_CAP2_CMD_QUEUE;
+	}
+}
+#else
+static void sdhci_msm_cmdq_init(struct sdhci_host *host,
+				struct platform_device *pdev)
+{
+
+}
+#endif
+
+static bool sdhci_msm_is_bootdevice(struct device *dev)
+{
+	if (strnstr(saved_command_line, "androidboot.bootdevice=",
+		    strlen(saved_command_line))) {
+		char search_string[50];
+
+		snprintf(search_string, ARRAY_SIZE(search_string),
+			"androidboot.bootdevice=%s", dev_name(dev));
+		if (strnstr(saved_command_line, search_string,
+		    strlen(saved_command_line)))
+			return true;
+		else
+			return false;
+	}
+
+	/*
+	 * "androidboot.bootdevice=" argument is not present then
+	 * return true as we don't know the boot device anyways.
+	 */
+	return true;
 }
 
 static int sdhci_msm_probe(struct platform_device *pdev)
 {
+	const struct sdhci_msm_offset *msm_host_offset;
 	struct sdhci_host *host;
 	struct sdhci_pltfm_host *pltfm_host;
 	struct sdhci_msm_host *msm_host;
 	struct resource *core_memres = NULL;
-	int ret = 0, pwr_irq = 0, dead = 0;
+	int ret = 0, dead = 0;
 	u16 host_version;
-	u32 pwr, irq_status, irq_ctl;
+	u32 irq_status, irq_ctl;
+	struct resource *tlmm_memres = NULL;
+	void __iomem *tlmm_mem;
+	unsigned long flags;
 
 	pr_debug("%s: Enter %s\n", dev_name(&pdev->dev), __func__);
 	msm_host = devm_kzalloc(&pdev->dev, sizeof(struct sdhci_msm_host),
@@ -2784,6 +4105,14 @@
 		goto out;
 	}
 
+	if (of_find_compatible_node(NULL, NULL, "qcom,sdhci-msm-v5")) {
+		msm_host->mci_removed = true;
+		msm_host->offset = &sdhci_msm_offset_mci_removed;
+	} else {
+		msm_host->mci_removed = false;
+		msm_host->offset = &sdhci_msm_offset_mci_present;
+	}
+	msm_host_offset = msm_host->offset;
 	msm_host->sdhci_msm_pdata.ops = &sdhci_msm_ops;
 	host = sdhci_pltfm_init(pdev, &msm_host->sdhci_msm_pdata, 0);
 	if (IS_ERR(host)) {
@@ -2799,11 +4128,18 @@
 	/* Extract platform data */
 	if (pdev->dev.of_node) {
 		ret = of_alias_get_id(pdev->dev.of_node, "sdhc");
-		if (ret < 0) {
+		if (ret <= 0) {
 			dev_err(&pdev->dev, "Failed to get slot index %d\n",
 				ret);
 			goto pltfm_free;
 		}
+
+		/* skip the probe if eMMC isn't a boot device */
+		if ((ret == 1) && !sdhci_msm_is_bootdevice(&pdev->dev)) {
+			ret = -ENODEV;
+			goto pltfm_free;
+		}
+
 		if (disable_slots & (1 << (ret - 1))) {
 			dev_info(&pdev->dev, "%s: Slot %d disabled\n", __func__,
 				ret);
@@ -2811,7 +4147,11 @@
 			goto pltfm_free;
 		}
 
-		msm_host->pdata = sdhci_msm_populate_pdata(&pdev->dev);
+		if (ret <= 2)
+			sdhci_slot[ret-1] = msm_host;
+
+		msm_host->pdata = sdhci_msm_populate_pdata(&pdev->dev,
+							   msm_host);
 		if (!msm_host->pdata) {
 			dev_err(&pdev->dev, "DT parsing error\n");
 			goto pltfm_free;
@@ -2901,46 +4241,62 @@
 	/* Reset the core and Enable SDHC mode */
 	core_memres = platform_get_resource_byname(pdev,
 				IORESOURCE_MEM, "core_mem");
-	if (!core_memres) {
-		dev_err(&pdev->dev, "Failed to get iomem resource\n");
-		goto vreg_deinit;
-	}
-	msm_host->core_mem = devm_ioremap(&pdev->dev, core_memres->start,
-					resource_size(core_memres));
+	if (!msm_host->mci_removed) {
+		if (!core_memres) {
+			dev_err(&pdev->dev, "Failed to get iomem resource\n");
+			goto vreg_deinit;
+		}
+		msm_host->core_mem = devm_ioremap(&pdev->dev,
+			core_memres->start, resource_size(core_memres));
 
-	if (!msm_host->core_mem) {
-		dev_err(&pdev->dev, "Failed to remap registers\n");
-		ret = -ENOMEM;
-		goto vreg_deinit;
+		if (!msm_host->core_mem) {
+			dev_err(&pdev->dev, "Failed to remap registers\n");
+			ret = -ENOMEM;
+			goto vreg_deinit;
+		}
 	}
 
-	/* Unset HC_MODE_EN bit in HC_MODE register */
-	writel_relaxed(0, (msm_host->core_mem + CORE_HC_MODE));
+	tlmm_memres = platform_get_resource_byname(pdev,
+				IORESOURCE_MEM, "tlmm_mem");
+	if (tlmm_memres) {
+		tlmm_mem = devm_ioremap(&pdev->dev, tlmm_memres->start,
+						resource_size(tlmm_memres));
 
-	/* Set SW_RST bit in POWER register (Offset 0x0) */
-	writel_relaxed(readl_relaxed(msm_host->core_mem + CORE_POWER) |
-			CORE_SW_RST, msm_host->core_mem + CORE_POWER);
+		if (!tlmm_mem) {
+			dev_err(&pdev->dev, "Failed to remap tlmm registers\n");
+			ret = -ENOMEM;
+			goto vreg_deinit;
+		}
+		writel_relaxed(readl_relaxed(tlmm_mem) | 0x2, tlmm_mem);
+		dev_dbg(&pdev->dev, "tlmm reg %pa value 0x%08x\n",
+				&tlmm_memres->start, readl_relaxed(tlmm_mem));
+	}
+
 	/*
-	 * SW reset can take upto 10HCLK + 15MCLK cycles.
-	 * Calculating based on min clk rates (hclk = 27MHz,
-	 * mclk = 400KHz) it comes to ~40us. Let's poll for
-	 * max. 1ms for reset completion.
+	 * Reset the vendor spec register to power on reset state.
 	 */
-	ret = readl_poll_timeout(msm_host->core_mem + CORE_POWER,
-			pwr, !(pwr & CORE_SW_RST), 10, 1000);
+	writel_relaxed(CORE_VENDOR_SPEC_POR_VAL,
+	host->ioaddr + msm_host_offset->CORE_VENDOR_SPEC);
 
-	if (ret) {
-		dev_err(&pdev->dev, "reset failed (%d)\n", ret);
-		goto vreg_deinit;
+	if (!msm_host->mci_removed) {
+		/* Set HC_MODE_EN bit in HC_MODE register */
+		writel_relaxed(HC_MODE_EN, (msm_host->core_mem + CORE_HC_MODE));
+
+		/* Set FF_CLK_SW_RST_DIS bit in HC_MODE register */
+		writel_relaxed(readl_relaxed(msm_host->core_mem +
+				CORE_HC_MODE) | FF_CLK_SW_RST_DIS,
+				msm_host->core_mem + CORE_HC_MODE);
 	}
-	/* Set HC_MODE_EN bit in HC_MODE register */
-	writel_relaxed(HC_MODE_EN, (msm_host->core_mem + CORE_HC_MODE));
-
-	/* Set FF_CLK_SW_RST_DIS bit in HC_MODE register */
-	writel_relaxed(readl_relaxed(msm_host->core_mem + CORE_HC_MODE) |
-			FF_CLK_SW_RST_DIS, msm_host->core_mem + CORE_HC_MODE);
-
 	sdhci_set_default_hw_caps(msm_host, host);
+
+	/*
+	 * Set the PAD_PWR_SWTICH_EN bit so that the PAD_PWR_SWITCH bit can
+	 * be used as required later on.
+	 */
+	writel_relaxed((readl_relaxed(host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC) |
+			CORE_IO_PAD_PWR_SWITCH_EN), host->ioaddr +
+			msm_host_offset->CORE_VENDOR_SPEC);
 	/*
 	 * CORE_SW_RST above may trigger power irq if previous status of PWRCTL
 	 * was either BUS_ON or IO_HIGH_V. So before we enable the power irq
@@ -2948,14 +4304,20 @@
 	 * ensure that any pending power irq interrupt status is acknowledged
 	 * otherwise power irq interrupt handler would be fired prematurely.
 	 */
-	irq_status = readl_relaxed(msm_host->core_mem + CORE_PWRCTL_STATUS);
-	writel_relaxed(irq_status, (msm_host->core_mem + CORE_PWRCTL_CLEAR));
-	irq_ctl = readl_relaxed(msm_host->core_mem + CORE_PWRCTL_CTL);
+	irq_status = sdhci_msm_readl_relaxed(host,
+		msm_host_offset->CORE_PWRCTL_STATUS);
+	sdhci_msm_writel_relaxed(irq_status, host,
+		msm_host_offset->CORE_PWRCTL_CLEAR);
+	irq_ctl = sdhci_msm_readl_relaxed(host,
+		msm_host_offset->CORE_PWRCTL_CTL);
+
 	if (irq_status & (CORE_PWRCTL_BUS_ON | CORE_PWRCTL_BUS_OFF))
 		irq_ctl |= CORE_PWRCTL_BUS_SUCCESS;
 	if (irq_status & (CORE_PWRCTL_IO_HIGH | CORE_PWRCTL_IO_LOW))
 		irq_ctl |= CORE_PWRCTL_IO_SUCCESS;
-	writel_relaxed(irq_ctl, (msm_host->core_mem + CORE_PWRCTL_CTL));
+	sdhci_msm_writel_relaxed(irq_ctl, host,
+		msm_host_offset->CORE_PWRCTL_CTL);
+
 	/*
 	 * Ensure that above writes are propogated before interrupt enablement
 	 * in GIC.
@@ -2970,10 +4332,13 @@
 	host->quirks |= SDHCI_QUIRK_BROKEN_CARD_DETECTION;
 	host->quirks |= SDHCI_QUIRK_SINGLE_POWER_WRITE;
 	host->quirks |= SDHCI_QUIRK_CAP_CLOCK_BASE_BROKEN;
+	host->quirks |= SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC;
 	host->quirks2 |= SDHCI_QUIRK2_ALWAYS_USE_BASE_CLOCK;
 	host->quirks2 |= SDHCI_QUIRK2_IGNORE_DATATOUT_FOR_R1BCMD;
 	host->quirks2 |= SDHCI_QUIRK2_BROKEN_PRESET_VALUE;
 	host->quirks2 |= SDHCI_QUIRK2_USE_RESERVED_MAX_TIMEOUT;
+	host->quirks2 |= SDHCI_QUIRK2_NON_STANDARD_TUNING;
+	host->quirks2 |= SDHCI_QUIRK2_USE_PIO_FOR_EMMC_TUNING;
 
 	if (host->quirks2 & SDHCI_QUIRK2_ALWAYS_USE_BASE_CLOCK)
 		host->quirks2 |= SDHCI_QUIRK2_DIVIDE_TOUT_BY_4;
@@ -2999,23 +4364,24 @@
 	host->quirks2 |= SDHCI_QUIRK2_IGN_DATA_END_BIT_ERROR;
 
 	/* Setup PWRCTL irq */
-	pwr_irq = platform_get_irq_byname(pdev, "pwr_irq");
-	if (pwr_irq < 0) {
+	msm_host->pwr_irq = platform_get_irq_byname(pdev, "pwr_irq");
+	if (msm_host->pwr_irq < 0) {
 		dev_err(&pdev->dev, "Failed to get pwr_irq by name (%d)\n",
-				pwr_irq);
+				msm_host->pwr_irq);
 		goto vreg_deinit;
 	}
-	ret = devm_request_threaded_irq(&pdev->dev, pwr_irq, NULL,
+	ret = devm_request_threaded_irq(&pdev->dev, msm_host->pwr_irq, NULL,
 					sdhci_msm_pwr_irq, IRQF_ONESHOT,
 					dev_name(&pdev->dev), host);
 	if (ret) {
 		dev_err(&pdev->dev, "Request threaded irq(%d) failed (%d)\n",
-				pwr_irq, ret);
+				msm_host->pwr_irq, ret);
 		goto vreg_deinit;
 	}
 
 	/* Enable pwr irq interrupts */
-	writel_relaxed(INT_MASK, (msm_host->core_mem + CORE_PWRCTL_MASK));
+	sdhci_msm_writel_relaxed(INT_MASK, host,
+		msm_host_offset->CORE_PWRCTL_MASK);
 
 #ifdef CONFIG_MMC_CLKGATE
 	/* Set clock gating delay to be used when CONFIG_MMC_CLKGATE is set */
@@ -3025,17 +4391,22 @@
 	/* Set host capabilities */
 	msm_host->mmc->caps |= msm_host->pdata->mmc_bus_width;
 	msm_host->mmc->caps |= msm_host->pdata->caps;
+	msm_host->mmc->caps |= MMC_CAP_AGGRESSIVE_PM;
+	msm_host->mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
 	msm_host->mmc->caps2 |= msm_host->pdata->caps2;
-	msm_host->mmc->caps2 |= MMC_CAP2_PACKED_WR;
-	msm_host->mmc->caps2 |= MMC_CAP2_PACKED_WR_CONTROL;
+	msm_host->mmc->caps2 |= MMC_CAP2_BOOTPART_NOACC;
+	msm_host->mmc->caps2 |= MMC_CAP2_HS400_POST_TUNING;
 	msm_host->mmc->caps2 |= MMC_CAP2_CLK_SCALE;
-	msm_host->mmc->caps2 |= MMC_CAP2_ASYNC_SDIO_IRQ_4BIT_MODE;
-	msm_host->mmc->pm_caps |= MMC_PM_KEEP_POWER;
+	msm_host->mmc->caps2 |= MMC_CAP2_SANITIZE;
+	msm_host->mmc->caps2 |= MMC_CAP2_MAX_DISCARD_SIZE;
+	msm_host->mmc->caps2 |= MMC_CAP2_SLEEP_AWAKE;
+	msm_host->mmc->pm_caps |= MMC_PM_KEEP_POWER | MMC_PM_WAKE_SDIO_IRQ;
 
 	if (msm_host->pdata->nonremovable)
 		msm_host->mmc->caps |= MMC_CAP_NONREMOVABLE;
 
-	host->cpu_dma_latency_us = msm_host->pdata->cpu_dma_latency_us;
+	if (msm_host->pdata->nonhotplug)
+		msm_host->mmc->caps2 |= MMC_CAP2_NONHOTPLUG;
 
 	init_completion(&msm_host->pwr_irq_completion);
 
@@ -3047,6 +4418,11 @@
 		 */
 		sdhci_msm_setup_pins(msm_host->pdata, true);
 
+		/*
+		 * This delay is needed for stabilizing the card detect GPIO
+		 * line after changing the pull configs.
+		 */
+		usleep_range(10000, 10500);
 		ret = mmc_gpio_request_cd(msm_host->mmc,
 				msm_host->pdata->status_gpio, 0);
 		if (ret) {
@@ -3060,19 +4436,51 @@
 		(dma_supported(mmc_dev(host->mmc), DMA_BIT_MASK(64)))) {
 		host->dma_mask = DMA_BIT_MASK(64);
 		mmc_dev(host->mmc)->dma_mask = &host->dma_mask;
+		mmc_dev(host->mmc)->coherent_dma_mask  = host->dma_mask;
 	} else if (dma_supported(mmc_dev(host->mmc), DMA_BIT_MASK(32))) {
 		host->dma_mask = DMA_BIT_MASK(32);
 		mmc_dev(host->mmc)->dma_mask = &host->dma_mask;
+		mmc_dev(host->mmc)->coherent_dma_mask  = host->dma_mask;
 	} else {
 		dev_err(&pdev->dev, "%s: Failed to set dma mask\n", __func__);
 	}
 
+	msm_host->pdata->sdiowakeup_irq = platform_get_irq_byname(pdev,
+							  "sdiowakeup_irq");
+	if (sdhci_is_valid_gpio_wakeup_int(msm_host)) {
+		dev_info(&pdev->dev, "%s: sdiowakeup_irq = %d\n", __func__,
+				msm_host->pdata->sdiowakeup_irq);
+		msm_host->is_sdiowakeup_enabled = true;
+		ret = request_irq(msm_host->pdata->sdiowakeup_irq,
+				  sdhci_msm_sdiowakeup_irq,
+				  IRQF_SHARED | IRQF_TRIGGER_HIGH,
+				  "sdhci-msm sdiowakeup", host);
+		if (ret) {
+			dev_err(&pdev->dev, "%s: request sdiowakeup IRQ %d: failed: %d\n",
+				__func__, msm_host->pdata->sdiowakeup_irq, ret);
+			msm_host->pdata->sdiowakeup_irq = -1;
+			msm_host->is_sdiowakeup_enabled = false;
+			goto vreg_deinit;
+		} else {
+			spin_lock_irqsave(&host->lock, flags);
+			sdhci_msm_cfg_sdiowakeup_gpio_irq(host, false);
+			msm_host->sdio_pending_processing = false;
+			spin_unlock_irqrestore(&host->lock, flags);
+		}
+	}
+
+	sdhci_msm_cmdq_init(host, pdev);
 	ret = sdhci_add_host(host);
 	if (ret) {
 		dev_err(&pdev->dev, "Add host failed (%d)\n", ret);
 		goto vreg_deinit;
 	}
 
+	pm_runtime_set_active(&pdev->dev);
+	pm_runtime_enable(&pdev->dev);
+	pm_runtime_set_autosuspend_delay(&pdev->dev, MSM_AUTOSUSPEND_DELAY_MS);
+	pm_runtime_use_autosuspend(&pdev->dev);
+
 	msm_host->msm_bus_vote.max_bus_bw.show = show_sdhci_max_bus_bw;
 	msm_host->msm_bus_vote.max_bus_bw.store = store_sdhci_max_bus_bw;
 	sysfs_attr_init(&msm_host->msm_bus_vote.max_bus_bw.attr);
@@ -3105,7 +4513,6 @@
 		       mmc_hostname(host->mmc), __func__, ret);
 		device_remove_file(&pdev->dev, &msm_host->auto_cmd21_attr);
 	}
-
 	/* Successful initialization */
 	goto out;
 
@@ -3113,6 +4520,7 @@
 	device_remove_file(&pdev->dev, &msm_host->msm_bus_vote.max_bus_bw);
 remove_host:
 	dead = (readl_relaxed(host->ioaddr + SDHCI_INT_STATUS) == 0xffffffff);
+	pm_runtime_disable(&pdev->dev);
 	sdhci_remove_host(host, dead);
 vreg_deinit:
 	sdhci_msm_vreg_init(&pdev->dev, msm_host->pdata, false);
@@ -3155,6 +4563,7 @@
 	if (!gpio_is_valid(msm_host->pdata->status_gpio))
 		device_remove_file(&pdev->dev, &msm_host->polling);
 	device_remove_file(&pdev->dev, &msm_host->msm_bus_vote.max_bus_bw);
+	pm_runtime_disable(&pdev->dev);
 	sdhci_remove_host(host, dead);
 	sdhci_pltfm_free(pdev);
 
@@ -3170,8 +4579,209 @@
 	return 0;
 }
 
+#ifdef CONFIG_PM
+static int sdhci_msm_cfg_sdio_wakeup(struct sdhci_host *host, bool enable)
+{
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	unsigned long flags;
+	int ret = 0;
+
+	if (!(host->mmc->card && mmc_card_sdio(host->mmc->card) &&
+	      sdhci_is_valid_gpio_wakeup_int(msm_host) &&
+	      mmc_card_wake_sdio_irq(host->mmc))) {
+		msm_host->sdio_pending_processing = false;
+		return 1;
+	}
+
+	spin_lock_irqsave(&host->lock, flags);
+	if (enable) {
+		/* configure DAT1 gpio if applicable */
+		if (sdhci_is_valid_gpio_wakeup_int(msm_host)) {
+			msm_host->sdio_pending_processing = false;
+			ret = enable_irq_wake(msm_host->pdata->sdiowakeup_irq);
+			if (!ret)
+				sdhci_msm_cfg_sdiowakeup_gpio_irq(host, true);
+			goto out;
+		} else {
+			pr_err("%s: sdiowakeup_irq(%d) invalid\n",
+					mmc_hostname(host->mmc), enable);
+		}
+	} else {
+		if (sdhci_is_valid_gpio_wakeup_int(msm_host)) {
+			ret = disable_irq_wake(msm_host->pdata->sdiowakeup_irq);
+			sdhci_msm_cfg_sdiowakeup_gpio_irq(host, false);
+			msm_host->sdio_pending_processing = false;
+		} else {
+			pr_err("%s: sdiowakeup_irq(%d)invalid\n",
+					mmc_hostname(host->mmc), enable);
+
+		}
+	}
+out:
+	if (ret)
+		pr_err("%s: %s: %sable wakeup: failed: %d gpio: %d\n",
+		       mmc_hostname(host->mmc), __func__, enable ? "en" : "dis",
+		       ret, msm_host->pdata->sdiowakeup_irq);
+	spin_unlock_irqrestore(&host->lock, flags);
+	return ret;
+}
+
+
+static int sdhci_msm_runtime_suspend(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	ktime_t start = ktime_get();
+
+	if (host->mmc->card && mmc_card_sdio(host->mmc->card))
+		goto defer_disable_host_irq;
+
+	sdhci_cfg_irq(host, false, true);
+
+defer_disable_host_irq:
+	disable_irq(msm_host->pwr_irq);
+
+	/*
+	 * Remove the vote immediately only if clocks are off in which
+	 * case we might have queued work to remove vote but it may not
+	 * be completed before runtime suspend or system suspend.
+	 */
+	if (!atomic_read(&msm_host->clks_on)) {
+		if (msm_host->msm_bus_vote.client_handle)
+			sdhci_msm_bus_cancel_work_and_set_vote(host, 0);
+	}
+	trace_sdhci_msm_runtime_suspend(mmc_hostname(host->mmc), 0,
+			ktime_to_us(ktime_sub(ktime_get(), start)));
+
+	return 0;
+}
+
+static int sdhci_msm_runtime_resume(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	ktime_t start = ktime_get();
+
+	if (host->mmc->card && mmc_card_sdio(host->mmc->card))
+		goto defer_enable_host_irq;
+
+	sdhci_cfg_irq(host, true, true);
+
+defer_enable_host_irq:
+	enable_irq(msm_host->pwr_irq);
+
+	trace_sdhci_msm_runtime_resume(mmc_hostname(host->mmc), 0,
+			ktime_to_us(ktime_sub(ktime_get(), start)));
+	return 0;
+}
+
+static int sdhci_msm_suspend(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int ret = 0;
+	int sdio_cfg = 0;
+	ktime_t start = ktime_get();
+
+	if (gpio_is_valid(msm_host->pdata->status_gpio) &&
+		(msm_host->mmc->slot.cd_irq >= 0))
+			disable_irq(msm_host->mmc->slot.cd_irq);
+
+	if (pm_runtime_suspended(dev)) {
+		pr_debug("%s: %s: already runtime suspended\n",
+		mmc_hostname(host->mmc), __func__);
+		goto out;
+	}
+	ret = sdhci_msm_runtime_suspend(dev);
+out:
+	sdhci_msm_disable_controller_clock(host);
+	if (host->mmc->card && mmc_card_sdio(host->mmc->card)) {
+		sdio_cfg = sdhci_msm_cfg_sdio_wakeup(host, true);
+		if (sdio_cfg)
+			sdhci_cfg_irq(host, false, true);
+	}
+
+	trace_sdhci_msm_suspend(mmc_hostname(host->mmc), ret,
+			ktime_to_us(ktime_sub(ktime_get(), start)));
+	return ret;
+}
+
+static int sdhci_msm_resume(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int ret = 0;
+	int sdio_cfg = 0;
+	ktime_t start = ktime_get();
+
+	if (gpio_is_valid(msm_host->pdata->status_gpio) &&
+		(msm_host->mmc->slot.cd_irq >= 0))
+			enable_irq(msm_host->mmc->slot.cd_irq);
+
+	if (pm_runtime_suspended(dev)) {
+		pr_debug("%s: %s: runtime suspended, defer system resume\n",
+		mmc_hostname(host->mmc), __func__);
+		goto out;
+	}
+
+	ret = sdhci_msm_runtime_resume(dev);
+out:
+	if (host->mmc->card && mmc_card_sdio(host->mmc->card)) {
+		sdio_cfg = sdhci_msm_cfg_sdio_wakeup(host, false);
+		if (sdio_cfg)
+			sdhci_cfg_irq(host, true, true);
+	}
+
+	trace_sdhci_msm_resume(mmc_hostname(host->mmc), ret,
+			ktime_to_us(ktime_sub(ktime_get(), start)));
+	return ret;
+}
+
+static int sdhci_msm_suspend_noirq(struct device *dev)
+{
+	struct sdhci_host *host = dev_get_drvdata(dev);
+	struct sdhci_pltfm_host *pltfm_host = sdhci_priv(host);
+	struct sdhci_msm_host *msm_host = pltfm_host->priv;
+	int ret = 0;
+
+	/*
+	 * ksdioirqd may be running, hence retry
+	 * suspend in case the clocks are ON
+	 */
+	if (atomic_read(&msm_host->clks_on)) {
+		pr_warn("%s: %s: clock ON after suspend, aborting suspend\n",
+			mmc_hostname(host->mmc), __func__);
+		ret = -EAGAIN;
+	}
+
+	if (host->mmc->card && mmc_card_sdio(host->mmc->card))
+		if (msm_host->sdio_pending_processing)
+			ret = -EBUSY;
+
+	return ret;
+}
+
+static const struct dev_pm_ops sdhci_msm_pmops = {
+	SET_SYSTEM_SLEEP_PM_OPS(sdhci_msm_suspend, sdhci_msm_resume)
+	SET_RUNTIME_PM_OPS(sdhci_msm_runtime_suspend, sdhci_msm_runtime_resume,
+			   NULL)
+	.suspend_noirq = sdhci_msm_suspend_noirq,
+};
+
+#define SDHCI_MSM_PMOPS (&sdhci_msm_pmops)
+
+#else
+#define SDHCI_MSM_PMOPS NULL
+#endif
 static const struct of_device_id sdhci_msm_dt_match[] = {
 	{.compatible = "qcom,sdhci-msm"},
+	{.compatible = "qcom,sdhci-msm-v5"},
+	{},
 };
 MODULE_DEVICE_TABLE(of, sdhci_msm_dt_match);
 
@@ -3182,6 +4792,7 @@
 		.name	= "sdhci_msm",
 		.owner	= THIS_MODULE,
 		.of_match_table = sdhci_msm_dt_match,
+		.pm	= SDHCI_MSM_PMOPS,
 	},
 };
 
diff --git a/drivers/mmc/host/sdhci-msm.h b/drivers/mmc/host/sdhci-msm.h
new file mode 100644
index 0000000..533b241
--- /dev/null
+++ b/drivers/mmc/host/sdhci-msm.h
@@ -0,0 +1,235 @@
+/*
+ * Copyright (c) 2016-2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __SDHCI_MSM_H__
+#define __SDHCI_MSM_H__
+
+#include <linux/mmc/mmc.h>
+#include <linux/pm_qos.h>
+#include "sdhci-pltfm.h"
+
+/* This structure keeps information per regulator */
+struct sdhci_msm_reg_data {
+	/* voltage regulator handle */
+	struct regulator *reg;
+	/* regulator name */
+	const char *name;
+	/* voltage level to be set */
+	u32 low_vol_level;
+	u32 high_vol_level;
+	/* Load values for low power and high power mode */
+	u32 lpm_uA;
+	u32 hpm_uA;
+
+	/* is this regulator enabled? */
+	bool is_enabled;
+	/* is this regulator needs to be always on? */
+	bool is_always_on;
+	/* is low power mode setting required for this regulator? */
+	bool lpm_sup;
+	bool set_voltage_sup;
+};
+
+/*
+ * This structure keeps information for all the
+ * regulators required for a SDCC slot.
+ */
+struct sdhci_msm_slot_reg_data {
+	/* keeps VDD/VCC regulator info */
+	struct sdhci_msm_reg_data *vdd_data;
+	 /* keeps VDD IO regulator info */
+	struct sdhci_msm_reg_data *vdd_io_data;
+};
+
+struct sdhci_msm_gpio {
+	u32 no;
+	const char *name;
+	bool is_enabled;
+};
+
+struct sdhci_msm_gpio_data {
+	struct sdhci_msm_gpio *gpio;
+	u8 size;
+};
+
+struct sdhci_msm_pin_data {
+	/*
+	 * = 1 if controller pins are using gpios
+	 * = 0 if controller has dedicated MSM pads
+	 */
+	u8 is_gpio;
+	struct sdhci_msm_gpio_data *gpio_data;
+};
+
+struct sdhci_pinctrl_data {
+	struct pinctrl          *pctrl;
+	struct pinctrl_state    *pins_active;
+	struct pinctrl_state    *pins_sleep;
+};
+
+struct sdhci_msm_bus_voting_data {
+	struct msm_bus_scale_pdata *bus_pdata;
+	unsigned int *bw_vecs;
+	unsigned int bw_vecs_size;
+};
+
+struct sdhci_msm_cpu_group_map {
+	int nr_groups;
+	cpumask_t *mask;
+};
+
+struct sdhci_msm_pm_qos_latency {
+	s32 latency[SDHCI_POWER_POLICY_NUM];
+};
+
+struct sdhci_msm_pm_qos_data {
+	struct sdhci_msm_cpu_group_map cpu_group_map;
+	enum pm_qos_req_type irq_req_type;
+	int irq_cpu;
+	struct sdhci_msm_pm_qos_latency irq_latency;
+	struct sdhci_msm_pm_qos_latency *cmdq_latency;
+	struct sdhci_msm_pm_qos_latency *latency;
+	bool irq_valid;
+	bool cmdq_valid;
+	bool legacy_valid;
+};
+
+/*
+ * PM QoS for group voting management - each cpu group defined is associated
+ * with 1 instance of this structure.
+ */
+struct sdhci_msm_pm_qos_group {
+	struct pm_qos_request req;
+	struct delayed_work unvote_work;
+	atomic_t counter;
+	s32 latency;
+};
+
+/* PM QoS HW IRQ voting */
+struct sdhci_msm_pm_qos_irq {
+	struct pm_qos_request req;
+	struct delayed_work unvote_work;
+	struct device_attribute enable_attr;
+	struct device_attribute status_attr;
+	atomic_t counter;
+	s32 latency;
+	bool enabled;
+};
+
+struct sdhci_msm_pltfm_data {
+	/* Supported UHS-I Modes */
+	u32 caps;
+
+	/* More capabilities */
+	u32 caps2;
+
+	unsigned long mmc_bus_width;
+	struct sdhci_msm_slot_reg_data *vreg_data;
+	bool nonremovable;
+	bool nonhotplug;
+	bool largeaddressbus;
+	bool pin_cfg_sts;
+	struct sdhci_msm_pin_data *pin_data;
+	struct sdhci_pinctrl_data *pctrl_data;
+	int status_gpio; /* card detection GPIO that is configured as IRQ */
+	struct sdhci_msm_bus_voting_data *voting_data;
+	u32 *sup_clk_table;
+	unsigned char sup_clk_cnt;
+	int sdiowakeup_irq;
+	u32 *sup_ice_clk_table;
+	unsigned char sup_ice_clk_cnt;
+	struct sdhci_msm_pm_qos_data pm_qos_data;
+	bool core_3_0v_support;
+};
+
+struct sdhci_msm_bus_vote {
+	uint32_t client_handle;
+	uint32_t curr_vote;
+	int min_bw_vote;
+	int max_bw_vote;
+	bool is_max_bw_needed;
+	struct delayed_work vote_work;
+	struct device_attribute max_bus_bw;
+};
+
+struct sdhci_msm_ice_data {
+	struct qcom_ice_variant_ops *vops;
+	struct completion async_done;
+	struct platform_device *pdev;
+	int state;
+};
+
+struct sdhci_msm_host {
+	struct platform_device	*pdev;
+	void __iomem *core_mem;    /* MSM SDCC mapped address */
+	void __iomem *cryptoio;    /* ICE HCI mapped address */
+	bool ice_hci_support;
+	int	pwr_irq;	/* power irq */
+	struct clk	 *clk;     /* main SD/MMC bus clock */
+	struct clk	 *pclk;    /* SDHC peripheral bus clock */
+	struct clk	 *bus_clk; /* SDHC bus voter clock */
+	struct clk	 *ff_clk; /* CDC calibration fixed feedback clock */
+	struct clk	 *sleep_clk; /* CDC calibration sleep clock */
+	struct clk	 *ice_clk; /* SDHC peripheral ICE clock */
+	atomic_t clks_on; /* Set if clocks are enabled */
+	struct sdhci_msm_pltfm_data *pdata;
+	struct mmc_host  *mmc;
+	struct sdhci_pltfm_data sdhci_msm_pdata;
+	u32 curr_pwr_state;
+	u32 curr_io_level;
+	struct completion pwr_irq_completion;
+	struct sdhci_msm_bus_vote msm_bus_vote;
+	struct device_attribute	polling;
+	u32 clk_rate; /* Keeps track of current clock rate that is set */
+	bool tuning_done;
+	bool calibration_done;
+	u8 saved_tuning_phase;
+	bool en_auto_cmd21;
+	struct device_attribute auto_cmd21_attr;
+	bool is_sdiowakeup_enabled;
+	bool sdio_pending_processing;
+	atomic_t controller_clock;
+	bool use_cdclp533;
+	bool use_updated_dll_reset;
+	bool use_14lpp_dll;
+	bool enhanced_strobe;
+	bool rclk_delay_fix;
+	u32 caps_0;
+	struct sdhci_msm_ice_data ice;
+	u32 ice_clk_rate;
+	struct sdhci_msm_pm_qos_group *pm_qos;
+	int pm_qos_prev_cpu;
+	struct device_attribute pm_qos_group_enable_attr;
+	struct device_attribute pm_qos_group_status_attr;
+	bool pm_qos_group_enable;
+	struct sdhci_msm_pm_qos_irq pm_qos_irq;
+	bool tuning_in_progress;
+	bool mci_removed;
+	const struct sdhci_msm_offset *offset;
+};
+
+extern char *saved_command_line;
+
+void sdhci_msm_pm_qos_irq_init(struct sdhci_host *host);
+void sdhci_msm_pm_qos_irq_vote(struct sdhci_host *host);
+void sdhci_msm_pm_qos_irq_unvote(struct sdhci_host *host, bool async);
+
+void sdhci_msm_pm_qos_cpu_init(struct sdhci_host *host,
+		struct sdhci_msm_pm_qos_latency *latency);
+void sdhci_msm_pm_qos_cpu_vote(struct sdhci_host *host,
+		struct sdhci_msm_pm_qos_latency *latency, int cpu);
+bool sdhci_msm_pm_qos_cpu_unvote(struct sdhci_host *host, int cpu, bool async);
+
+
+#endif /* __SDHCI_MSM_H__ */
diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index aa12482..47096fd 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -35,6 +35,7 @@
 #include <trace/events/mmc.h>
 
 #include "sdhci.h"
+#include "cmdq_hci.h"
 
 #define DRIVER_NAME "sdhci"
 
@@ -43,6 +44,9 @@
 
 #define MAX_TUNING_LOOP 40
 
+#define SDHCI_DBG_DUMP_RS_INTERVAL (10 * HZ)
+#define SDHCI_DBG_DUMP_RS_BURST 2
+
 static unsigned int debug_quirks = 0;
 static unsigned int debug_quirks2;
 
@@ -50,25 +54,19 @@
 
 static bool sdhci_check_state(struct sdhci_host *);
 
-static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable);
+static void sdhci_enable_sdio_irq_nolock(struct sdhci_host *host, int enable);
 
-#ifdef CONFIG_PM
-static int sdhci_runtime_pm_put(struct sdhci_host *host);
-#else
-static inline int sdhci_runtime_pm_put(struct sdhci_host *host)
-{
-	return 0;
-}
-#endif
+static void sdhci_enable_preset_value(struct sdhci_host *host, bool enable);
 
 static void sdhci_dump_state(struct sdhci_host *host)
 {
 	struct mmc_host *mmc = host->mmc;
 
 	#ifdef CONFIG_MMC_CLKGATE
-	pr_info("%s: clk: %d clk-gated: %d claimer: %s pwr: %d\n",
+	pr_info("%s: clk: %d clk-gated: %d claimer: %s pwr: %d host->irq = %d\n",
 		mmc_hostname(mmc), host->clock, mmc->clk_gated,
-		mmc->claimer->comm, host->pwr);
+		mmc->claimer->comm, host->pwr,
+		(host->flags & SDHCI_HOST_IRQ_STATUS));
 	#else
 	pr_info("%s: clk: %d claimer: %s pwr: %d\n",
 		mmc_hostname(mmc), host->clock,
@@ -82,6 +80,17 @@
 
 static void sdhci_dumpregs(struct sdhci_host *host)
 {
+	MMC_TRACE(host->mmc,
+		"%s: 0x04=0x%08x 0x06=0x%08x 0x0E=0x%08x 0x30=0x%08x 0x34=0x%08x 0x38=0x%08x\n",
+		__func__,
+		sdhci_readw(host, SDHCI_BLOCK_SIZE),
+		sdhci_readw(host, SDHCI_BLOCK_COUNT),
+		sdhci_readw(host, SDHCI_COMMAND),
+		sdhci_readl(host, SDHCI_INT_STATUS),
+		sdhci_readl(host, SDHCI_INT_ENABLE),
+		sdhci_readl(host, SDHCI_SIGNAL_ENABLE));
+	mmc_stop_tracing(host->mmc);
+
 	pr_info(DRIVER_NAME ": =========== REGISTER DUMP (%s)===========\n",
 		mmc_hostname(host->mmc));
 
@@ -145,33 +154,6 @@
 	pr_info(DRIVER_NAME ": ===========================================\n");
 }
 
-#define MAX_PM_QOS_TIMEOUT_VALUE	100000 /* 100 ms */
-static ssize_t
-show_sdhci_pm_qos_tout(struct device *dev, struct device_attribute *attr,
-			char *buf)
-{
-	struct sdhci_host *host = dev_get_drvdata(dev);
-
-	return snprintf(buf, PAGE_SIZE, "%d us\n", host->pm_qos_timeout_us);
-}
-
-static ssize_t
-store_sdhci_pm_qos_tout(struct device *dev, struct device_attribute *attr,
-		const char *buf, size_t count)
-{
-	struct sdhci_host *host = dev_get_drvdata(dev);
-	uint32_t value;
-	unsigned long flags;
-
-	if (!kstrtou32(buf, 0, &value)) {
-		spin_lock_irqsave(&host->lock, flags);
-		if (value <= MAX_PM_QOS_TIMEOUT_VALUE)
-			host->pm_qos_timeout_us = value;
-		spin_unlock_irqrestore(&host->lock, flags);
-	}
-	return count;
-}
-
 /*****************************************************************************\
  *                                                                           *
  * Low level functions                                                       *
@@ -235,6 +217,7 @@
 {
 	unsigned long timeout;
 
+retry_reset:
 	sdhci_writeb(host, mask, SDHCI_SOFTWARE_RESET);
 
 	if (mask & SDHCI_RESET_ALL) {
@@ -260,12 +243,43 @@
 		if (timeout == 0) {
 			pr_err("%s: Reset 0x%x never completed.\n",
 				mmc_hostname(host->mmc), (int)mask);
+			if ((host->quirks2 & SDHCI_QUIRK2_USE_RESET_WORKAROUND)
+				&& host->ops->reset_workaround) {
+				if (!host->reset_wa_applied) {
+					/*
+					 * apply the workaround and issue
+					 * reset again.
+					 */
+					host->ops->reset_workaround(host, 1);
+					host->reset_wa_applied = 1;
+					host->reset_wa_cnt++;
+					goto retry_reset;
+				} else {
+					pr_err("%s: Reset 0x%x failed with workaround\n",
+						mmc_hostname(host->mmc),
+						(int)mask);
+					/* clear the workaround */
+					host->ops->reset_workaround(host, 0);
+					host->reset_wa_applied = 0;
+				}
+			}
+
 			sdhci_dumpregs(host);
 			return;
 		}
 		timeout--;
-		mdelay(1);
+		udelay(1);
 	}
+
+	if ((host->quirks2 & SDHCI_QUIRK2_USE_RESET_WORKAROUND) &&
+			host->ops->reset_workaround && host->reset_wa_applied) {
+		pr_info("%s: Reset 0x%x successful with workaround\n",
+				mmc_hostname(host->mmc), (int)mask);
+		/* clear the workaround */
+		host->ops->reset_workaround(host, 0);
+		host->reset_wa_applied = 0;
+	}
+
 }
 EXPORT_SYMBOL_GPL(sdhci_reset);
 
@@ -881,6 +895,10 @@
 
 		host->flags |= SDHCI_REQ_USE_DMA;
 
+		if ((host->quirks2 & SDHCI_QUIRK2_USE_PIO_FOR_EMMC_TUNING) &&
+			cmd->opcode == MMC_SEND_TUNING_BLOCK_HS200)
+			host->flags &= ~SDHCI_REQ_USE_DMA;
+
 		/*
 		 * FIXME: This doesn't account for merging when mapping the
 		 * scatterlist.
@@ -987,6 +1005,11 @@
 	/* Set the DMA boundary value and block size */
 	sdhci_set_blk_size_reg(host, data->blksz, SDHCI_DEFAULT_BOUNDARY_ARG);
 	sdhci_writew(host, data->blocks, SDHCI_BLOCK_COUNT);
+	MMC_TRACE(host->mmc,
+		"%s: 0x28=0x%08x 0x3E=0x%08x 0x06=0x%08x\n", __func__,
+		sdhci_readb(host, SDHCI_HOST_CONTROL),
+		sdhci_readw(host, SDHCI_HOST_CONTROL2),
+		sdhci_readw(host, SDHCI_BLOCK_COUNT));
 }
 
 static inline bool sdhci_auto_cmd12(struct sdhci_host *host,
@@ -1037,8 +1060,14 @@
 
 	if (data->flags & MMC_DATA_READ) {
 		mode |= SDHCI_TRNS_READ;
-		if (host->ops->toggle_cdr)
-			host->ops->toggle_cdr(host, true);
+		if (host->ops->toggle_cdr) {
+			if ((cmd->opcode == MMC_SEND_TUNING_BLOCK_HS200) ||
+				(cmd->opcode == MMC_SEND_TUNING_BLOCK_HS400) ||
+				(cmd->opcode == MMC_SEND_TUNING_BLOCK))
+				host->ops->toggle_cdr(host, false);
+			else
+				host->ops->toggle_cdr(host, true);
+		}
 	}
 	if (host->ops->toggle_cdr && (data->flags & MMC_DATA_WRITE))
 		host->ops->toggle_cdr(host, false);
@@ -1046,6 +1075,9 @@
 		mode |= SDHCI_TRNS_DMA;
 
 	sdhci_writew(host, mode, SDHCI_TRANSFER_MODE);
+	MMC_TRACE(host->mmc, "%s: 0x00=0x%08x 0x0C=0x%08x\n", __func__,
+		sdhci_readw(host, SDHCI_ARGUMENT2),
+		sdhci_readw(host, SDHCI_TRANSFER_MODE));
 }
 
 static bool sdhci_needs_reset(struct sdhci_host *host, struct mmc_request *mrq)
@@ -1106,6 +1138,8 @@
 	host->data = NULL;
 	host->data_cmd = NULL;
 
+	MMC_TRACE(host->mmc, "%s: 0x24=0x%08x\n", __func__,
+		sdhci_readl(host, SDHCI_PRESENT_STATE));
 	if ((host->flags & (SDHCI_REQ_USE_DMA | SDHCI_USE_ADMA)) ==
 	    (SDHCI_REQ_USE_DMA | SDHCI_USE_ADMA))
 		sdhci_adma_table_post(host, data);
@@ -1264,6 +1298,11 @@
 	if (cmd->data)
 		host->data_start_time = ktime_get();
 	trace_mmc_cmd_rw_start(cmd->opcode, cmd->arg, cmd->flags);
+	MMC_TRACE(host->mmc,
+		"%s: updated 0x8=0x%08x 0xC=0x%08x 0xE=0x%08x\n", __func__,
+		sdhci_readl(host, SDHCI_ARGUMENT),
+		sdhci_readw(host, SDHCI_TRANSFER_MODE),
+		sdhci_readw(host, SDHCI_COMMAND));
 	sdhci_writew(host, SDHCI_MAKE_CMD(cmd->opcode, flags), SDHCI_COMMAND);
 }
 EXPORT_SYMBOL_GPL(sdhci_send_command);
@@ -1286,8 +1325,14 @@
 						sdhci_readb(host,
 						SDHCI_RESPONSE + (3-i)*4-1);
 			}
+			MMC_TRACE(host->mmc,
+			"%s: resp 0: 0x%08x resp 1: 0x%08x resp 2: 0x%08x resp 3: 0x%08x\n",
+			__func__, cmd->resp[0], cmd->resp[1],
+			cmd->resp[2], cmd->resp[3]);
 		} else {
 			cmd->resp[0] = sdhci_readl(host, SDHCI_RESPONSE);
+			MMC_TRACE(host->mmc, "%s: resp 0: 0x%08x\n",
+				__func__, cmd->resp[0]);
 		}
 	}
 
@@ -1609,9 +1654,6 @@
 {
 	struct sdhci_host *host = mmc_priv(mmc);
 
-	if (host->cpu_dma_latency_us)
-		pm_qos_update_request(&host->pm_qos_req_dma,
-					host->cpu_dma_latency_us);
 	if (host->ops->platform_bus_voting)
 		host->ops->platform_bus_voting(host, 1);
 
@@ -1622,29 +1664,26 @@
 {
 	struct sdhci_host *host = mmc_priv(mmc);
 
-	if (host->cpu_dma_latency_us) {
-		/*
-		 * In performance mode, release QoS vote after a timeout to
-		 * make sure back-to-back requests don't suffer from latencies
-		 * that are involved to wake CPU from low power modes in cases
-		 * where the CPU goes into low power mode as soon as QoS vote is
-		 * released.
-		 */
-		if (host->power_policy == SDHCI_PERFORMANCE_MODE)
-			pm_qos_update_request_timeout(&host->pm_qos_req_dma,
-					host->cpu_dma_latency_us,
-					host->pm_qos_timeout_us);
-		else
-			pm_qos_update_request(&host->pm_qos_req_dma,
-					PM_QOS_DEFAULT_VALUE);
-	}
-
 	if (host->ops->platform_bus_voting)
 		host->ops->platform_bus_voting(host, 0);
 
 	return 0;
 }
 
+static void sdhci_notify_halt(struct mmc_host *mmc, bool halt)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	pr_debug("%s: halt notification was sent, halt=%d\n",
+		mmc_hostname(mmc), halt);
+	if (host->flags & SDHCI_USE_64_BIT_DMA) {
+		if (halt)
+			host->desc_sz = 16;
+		else
+			host->desc_sz = SDHCI_ADMA2_64_DESC_SZ;
+	}
+}
+
 static inline void sdhci_update_power_policy(struct sdhci_host *host,
 		enum sdhci_power_policy policy)
 {
@@ -1668,6 +1707,9 @@
 		break;
 	}
 
+	if (host->ops->notify_load)
+		err = host->ops->notify_load(host, state);
+
 	return err;
 }
 
@@ -1721,8 +1763,9 @@
 		mrq->cmd->error = -EIO;
 		if (mrq->data)
 			mrq->data->error = -EIO;
+		host->mrq = NULL;
+		sdhci_dumpregs(host);
 		mmc_request_done(host->mmc, mrq);
-		sdhci_runtime_pm_put(host);
 		return;
 	}
 
@@ -1745,7 +1788,8 @@
 
 	spin_lock_irqsave(&host->lock, flags);
 
-	sdhci_led_activate(host);
+	if (!(host->quirks2 & SDHCI_QUIRK2_BROKEN_LED_CONTROL))
+		sdhci_led_activate(host);
 
 	/*
 	 * Ensure we don't send the STOP for non-SET_BLOCK_COUNTED
@@ -1827,6 +1871,21 @@
 }
 EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling);
 
+void sdhci_cfg_irq(struct sdhci_host *host, bool enable, bool sync)
+{
+	if (enable && !(host->flags & SDHCI_HOST_IRQ_STATUS)) {
+		enable_irq(host->irq);
+		host->flags |= SDHCI_HOST_IRQ_STATUS;
+	} else if (!enable && (host->flags & SDHCI_HOST_IRQ_STATUS)) {
+		if (sync)
+			disable_irq(host->irq);
+		else
+			disable_irq_nosync(host->irq);
+		host->flags &= ~SDHCI_HOST_IRQ_STATUS;
+	}
+}
+EXPORT_SYMBOL(sdhci_cfg_irq);
+
 static void sdhci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 {
 	struct sdhci_host *host = mmc_priv(mmc);
@@ -1846,6 +1905,34 @@
 		!(host->quirks2 & SDHCI_QUIRK2_PRESET_VALUE_BROKEN))
 		sdhci_enable_preset_value(host, false);
 
+	spin_lock_irqsave(&host->lock, flags);
+	if (host->mmc && host->mmc->card &&
+			mmc_card_sdio(host->mmc->card))
+		sdhci_cfg_irq(host, false, false);
+
+	if (ios->clock &&
+	    ((ios->clock != host->clock) || (ios->timing != host->timing))) {
+		spin_unlock_irqrestore(&host->lock, flags);
+		host->ops->set_clock(host, ios->clock);
+		spin_lock_irqsave(&host->lock, flags);
+		host->clock = ios->clock;
+
+		if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK &&
+		    host->clock) {
+			host->timeout_clk = host->mmc->actual_clock ?
+						host->mmc->actual_clock / 1000 :
+						host->clock / 1000;
+			host->mmc->max_busy_timeout =
+				host->ops->get_max_timeout_count ?
+				host->ops->get_max_timeout_count(host) :
+				1 << 27;
+			host->mmc->max_busy_timeout /= host->timeout_clk;
+		}
+	}
+	if (ios->clock && host->sdio_irq_async_status)
+		sdhci_enable_sdio_irq_nolock(host, false);
+	spin_unlock_irqrestore(&host->lock, flags);
+
 	/*
 	 * The controller clocks may be off during power-up and we may end up
 	 * enabling card clock before giving power to the card. Hence, during
@@ -1866,23 +1953,12 @@
 	}
 
 	spin_lock_irqsave(&host->lock, flags);
-	if (!ios->clock || ios->clock != host->clock) {
+	if (!host->clock) {
+		if (host->mmc && host->mmc->card &&
+				mmc_card_sdio(host->mmc->card))
+			sdhci_cfg_irq(host, true, false);
 		spin_unlock_irqrestore(&host->lock, flags);
-		host->ops->set_clock(host, ios->clock);
-		spin_lock_irqsave(&host->lock, flags);
-		host->clock = ios->clock;
-
-		if (host->quirks & SDHCI_QUIRK_DATA_TIMEOUT_USES_SDCLK &&
-		    host->clock) {
-			host->timeout_clk = host->mmc->actual_clock ?
-						host->mmc->actual_clock / 1000 :
-						host->clock / 1000;
-			host->mmc->max_busy_timeout =
-				host->ops->get_max_timeout_count ?
-				host->ops->get_max_timeout_count(host) :
-				1 << 27;
-			host->mmc->max_busy_timeout /= host->timeout_clk;
-		}
+		return;
 	}
 	spin_unlock_irqrestore(&host->lock, flags);
 
@@ -1962,8 +2038,11 @@
 			sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 
 			/* Re-enable SD Clock */
-			if (ios->clock)
+			if (ios->clock) {
+				spin_unlock_irqrestore(&host->lock, flags);
 				host->ops->set_clock(host, host->clock);
+				spin_lock_irqsave(&host->lock, flags);
+			}
 		}
 
 		/* Reset SD Clock Enable */
@@ -1990,8 +2069,11 @@
 		}
 
 		/* Re-enable SD Clock */
-		if (ios->clock)
+		if (ios->clock) {
+			spin_unlock_irqrestore(&host->lock, flags);
 			host->ops->set_clock(host, host->clock);
+			spin_lock_irqsave(&host->lock, flags);
+		}
 	} else
 		sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 
@@ -2014,7 +2096,13 @@
 		sdhci_set_power(host, ios->power_mode, ios->vdd);
 	}
 	if (!ios->clock)
-		sdhci_set_clock(host, ios->clock);
+		host->ops->set_clock(host, ios->clock);
+
+	spin_lock_irqsave(&host->lock, flags);
+	if (host->mmc && host->mmc->card &&
+			mmc_card_sdio(host->mmc->card))
+		sdhci_cfg_irq(host, true, false);
+	spin_unlock_irqrestore(&host->lock, flags);
 
 	mmiowb();
 }
@@ -2099,16 +2187,28 @@
 
 static void sdhci_enable_sdio_irq_nolock(struct sdhci_host *host, int enable)
 {
-	if (!(host->flags & SDHCI_DEVICE_DEAD)) {
-		if (enable)
-			host->ier |= SDHCI_INT_CARD_INT;
-		else
-			host->ier &= ~SDHCI_INT_CARD_INT;
+	u16 ctrl = 0;
 
-		sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
-		sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
-		mmiowb();
+	if (host->flags & SDHCI_DEVICE_DEAD)
+		return;
+
+	if (mmc_card_and_host_support_async_int(host->mmc)) {
+		ctrl = sdhci_readw(host, SDHCI_HOST_CONTROL2);
+		if (enable)
+			ctrl |= SDHCI_CTRL_ASYNC_INT_ENABLE;
+		else
+			ctrl &= ~SDHCI_CTRL_ASYNC_INT_ENABLE;
+		sdhci_writew(host, ctrl, SDHCI_HOST_CONTROL2);
 	}
+
+	if (enable)
+		host->ier |= SDHCI_INT_CARD_INT;
+	else
+		host->ier &= ~SDHCI_INT_CARD_INT;
+
+	sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
+	sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
+	mmiowb();
 }
 
 static void sdhci_enable_sdio_irq(struct mmc_host *mmc, int enable)
@@ -2253,6 +2353,17 @@
 	return 0;
 }
 
+static int sdhci_enhanced_strobe(struct mmc_host *mmc)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	int err = 0;
+
+	if (host->ops->enhanced_strobe)
+		err = host->ops->enhanced_strobe(host);
+
+	return err;
+}
+
 static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 {
 	struct sdhci_host *host = mmc_priv(mmc);
@@ -2281,9 +2392,10 @@
 	switch (host->timing) {
 	/* HS400 tuning is done in HS200 mode */
 	case MMC_TIMING_MMC_HS400:
-		err = -EINVAL;
-		goto out_unlock;
-
+		if (!(mmc->caps2 & MMC_CAP2_HS400_POST_TUNING)) {
+			err = -EINVAL;
+			goto out_unlock;
+		}
 	case MMC_TIMING_MMC_HS200:
 		/*
 		 * Periodic re-tuning for HS400 is not expected to be needed, so
@@ -2517,6 +2629,9 @@
 			       DMA_TO_DEVICE : DMA_FROM_DEVICE);
 
 	data->host_cookie = COOKIE_UNMAPPED;
+
+	if (host->ops->pre_req)
+		host->ops->pre_req(host, mrq);
 }
 
 static void sdhci_pre_req(struct mmc_host *mmc, struct mmc_request *mrq,
@@ -2578,7 +2693,27 @@
 	spin_unlock_irqrestore(&host->lock, flags);
 }
 
+static int sdhci_late_init(struct mmc_host *mmc)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	if (host->ops->init)
+		host->ops->init(host);
+
+	return 0;
+}
+
+static void sdhci_force_err_irq(struct mmc_host *mmc, u64 errmask)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	u16 mask = errmask & 0xFFFF;
+
+	pr_err("%s: Force raise error mask:0x%04x\n", __func__, mask);
+	sdhci_writew(host, mask, SDHCI_SET_INT_ERROR);
+}
+
 static const struct mmc_host_ops sdhci_ops = {
+	.init           = sdhci_late_init,
 	.request	= sdhci_request,
 	.post_req	= sdhci_post_req,
 	.pre_req	= sdhci_pre_req,
@@ -2590,12 +2725,15 @@
 	.start_signal_voltage_switch	= sdhci_start_signal_voltage_switch,
 	.prepare_hs400_tuning		= sdhci_prepare_hs400_tuning,
 	.execute_tuning			= sdhci_execute_tuning,
+	.enhanced_strobe		= sdhci_enhanced_strobe,
 	.select_drive_strength		= sdhci_select_drive_strength,
 	.card_event			= sdhci_card_event,
 	.card_busy	= sdhci_card_busy,
 	.enable		= sdhci_enable,
 	.disable	= sdhci_disable,
 	.notify_load	= sdhci_notify_load,
+	.notify_halt	= sdhci_notify_halt,
+	.force_err_irq	= sdhci_force_err_irq,
 };
 
 /*****************************************************************************\
@@ -2674,7 +2812,8 @@
 	}
 
 	if (!sdhci_has_requests(host))
-		sdhci_led_deactivate(host);
+		if (!(host->quirks2 & SDHCI_QUIRK2_BROKEN_LED_CONTROL))
+			sdhci_led_deactivate(host);
 
 	host->mrqs_done[i] = NULL;
 	host->auto_cmd_err_sts = 0;
@@ -2789,7 +2928,7 @@
 
 		if (intmask & SDHCI_INT_AUTO_CMD_ERR) {
 			auto_cmd_status = host->auto_cmd_err_sts;
-			pr_err("%s: %s: AUTO CMD err sts 0x%08x\n",
+			pr_err_ratelimited("%s: %s: AUTO CMD err sts 0x%08x\n",
 				mmc_hostname(host->mmc), __func__, auto_cmd_status);
 			if (auto_cmd_status & (SDHCI_AUTO_CMD12_NOT_EXEC |
 					       SDHCI_AUTO_CMD_INDEX_ERR |
@@ -2810,10 +2949,13 @@
 		 * If the card did not receive the command or returned an
 		 * error which prevented it sending data, the data phase
 		 * will time out.
+		 *
+		 * Even in case of cmd INDEX OR ENDBIT error we
+		 * handle it the same way.
 		 */
 		if (host->cmd->data &&
-		    (intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
-		     SDHCI_INT_CRC) {
+		    (((intmask & (SDHCI_INT_CRC | SDHCI_INT_TIMEOUT)) ==
+		     SDHCI_INT_CRC) || (host->cmd->error == -EILSEQ))) {
 			host->cmd = NULL;
 			return;
 		}
@@ -2869,8 +3011,9 @@
 
 	/* CMD19 generates _only_ Buffer Read Ready interrupt */
 	if (intmask & SDHCI_INT_DATA_AVAIL) {
-		if (command == MMC_SEND_TUNING_BLOCK ||
-		    command == MMC_SEND_TUNING_BLOCK_HS200) {
+		if (!(host->quirks2 & SDHCI_QUIRK2_NON_STANDARD_TUNING) &&
+			(command == MMC_SEND_TUNING_BLOCK ||
+			command == MMC_SEND_TUNING_BLOCK_HS200)) {
 			host->tuning_done = 1;
 			wake_up(&host->buf_ready_int);
 			return;
@@ -2940,7 +3083,8 @@
 			host->ops->adma_workaround(host, intmask);
 	}
 	if (host->data->error) {
-		if (intmask & (SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT)) {
+		if (intmask & (SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT
+					| SDHCI_INT_DATA_END_BIT)) {
 			command = SDHCI_GET_CMD(sdhci_readw(host,
 							    SDHCI_COMMAND));
 			if ((command != MMC_SEND_TUNING_BLOCK_HS200) &&
@@ -2949,7 +3093,7 @@
 		} else {
 			pr_msg = true;
 		}
-		if (pr_msg) {
+		if (pr_msg && __ratelimit(&host->dbg_dump_rs)) {
 			pr_err("%s: data txfr (0x%08x) error: %d after %lld ms\n",
 			       mmc_hostname(host->mmc), intmask,
 			       host->data->error, ktime_to_ms(ktime_sub(
@@ -3003,6 +3147,58 @@
 	}
 }
 
+#ifdef CONFIG_MMC_CQ_HCI
+static int sdhci_get_cmd_err(u32 intmask)
+{
+	if (intmask & SDHCI_INT_TIMEOUT)
+		return -ETIMEDOUT;
+	else if (intmask & (SDHCI_INT_CRC | SDHCI_INT_END_BIT |
+			    SDHCI_INT_INDEX))
+		return -EILSEQ;
+	return 0;
+}
+
+static int sdhci_get_data_err(u32 intmask)
+{
+	if (intmask & SDHCI_INT_DATA_TIMEOUT)
+		return -ETIMEDOUT;
+	else if (intmask & (SDHCI_INT_DATA_END_BIT | SDHCI_INT_DATA_CRC))
+		return -EILSEQ;
+	else if (intmask & SDHCI_INT_ADMA_ERROR)
+		return -EIO;
+	return 0;
+}
+
+static irqreturn_t sdhci_cmdq_irq(struct sdhci_host *host, u32 intmask)
+{
+	int err = 0;
+	u32 mask = 0;
+	irqreturn_t ret;
+
+	if (intmask & SDHCI_INT_CMD_MASK)
+		err = sdhci_get_cmd_err(intmask);
+	else if (intmask & SDHCI_INT_DATA_MASK)
+		err = sdhci_get_data_err(intmask);
+
+	ret = cmdq_irq(host->mmc, err);
+	if (err) {
+		/* Clear the error interrupts */
+		mask = intmask & SDHCI_INT_ERROR_MASK;
+		sdhci_writel(host, mask, SDHCI_INT_STATUS);
+	}
+	return ret;
+
+}
+
+#else
+static irqreturn_t sdhci_cmdq_irq(struct sdhci_host *host, u32 intmask)
+{
+	pr_err("%s: Received cmdq-irq when disabled !!!!\n",
+		mmc_hostname(host->mmc));
+	return IRQ_NONE;
+}
+#endif
+
 static irqreturn_t sdhci_irq(int irq, void *dev_id)
 {
 	irqreturn_t result = IRQ_NONE;
@@ -3017,6 +3213,31 @@
 		return IRQ_NONE;
 	}
 
+	if (!host->clock && host->mmc->card &&
+			mmc_card_sdio(host->mmc->card)) {
+		if (!mmc_card_and_host_support_async_int(host->mmc)) {
+			spin_unlock(&host->lock);
+			return IRQ_NONE;
+		}
+		/*
+		 * async card interrupt is level sensitive and received
+		 * when clocks are off.
+		 * If sdio card has asserted async interrupt, in that
+		 * case we need to disable host->irq.
+		 * Later we can disable card interrupt and re-enable
+		 * host->irq.
+		 */
+
+		pr_debug("%s: %s: sdio_async intr. received\n",
+				mmc_hostname(host->mmc), __func__);
+		sdhci_cfg_irq(host, false, false);
+		host->sdio_irq_async_status = true;
+		host->thread_isr |= SDHCI_INT_CARD_INT;
+		result = IRQ_WAKE_THREAD;
+		spin_unlock(&host->lock);
+		return result;
+	}
+
 	intmask = sdhci_readl(host, SDHCI_INT_STATUS);
 	if (!intmask || intmask == 0xffffffff) {
 		result = IRQ_NONE;
@@ -3024,6 +3245,19 @@
 	}
 
 	do {
+		if (host->mmc->card && mmc_card_cmdq(host->mmc->card) &&
+		!mmc_host_halt(host->mmc) && !mmc_host_cq_disable(host->mmc)) {
+			pr_debug("*** %s: cmdq intr: 0x%08x\n",
+					mmc_hostname(host->mmc),
+					intmask);
+			result = sdhci_cmdq_irq(host, intmask);
+			if (result == IRQ_HANDLED)
+				goto out;
+		}
+
+		MMC_TRACE(host->mmc,
+			"%s: intmask: 0x%x\n", __func__, intmask);
+
 		if (intmask & SDHCI_INT_AUTO_CMD_ERR)
 			host->auto_cmd_err_sts = sdhci_readw(host,
 					SDHCI_AUTO_CMD_ERR);
@@ -3142,8 +3376,11 @@
 		sdio_run_irqs(host->mmc);
 
 		spin_lock_irqsave(&host->lock, flags);
-		if (host->flags & SDHCI_SDIO_IRQ_ENABLED)
+		if (host->flags & SDHCI_SDIO_IRQ_ENABLED) {
+			if (host->sdio_irq_async_status)
+				host->sdio_irq_async_status = false;
 			sdhci_enable_sdio_irq_nolock(host, true);
+		}
 		spin_unlock_irqrestore(&host->lock, flags);
 	}
 
@@ -3258,12 +3495,6 @@
 
 EXPORT_SYMBOL_GPL(sdhci_resume_host);
 
-static int sdhci_runtime_pm_put(struct sdhci_host *host)
-{
-	pm_runtime_mark_last_busy(host->mmc->parent);
-	return pm_runtime_put_autosuspend(host->mmc->parent);
-}
-
 int sdhci_runtime_suspend_host(struct sdhci_host *host)
 {
 	unsigned long flags;
@@ -3363,30 +3594,180 @@
 	host->flags = SDHCI_SIGNALING_330;
 
 	spin_lock_init(&host->lock);
+	ratelimit_state_init(&host->dbg_dump_rs, SDHCI_DBG_DUMP_RS_INTERVAL,
+			SDHCI_DBG_DUMP_RS_BURST);
 
 	return host;
 }
 
 EXPORT_SYMBOL_GPL(sdhci_alloc_host);
-	
-#ifdef CONFIG_SMP
-static void sdhci_set_pmqos_req_type(struct sdhci_host *host)
-{
 
-	/*
-	 * The default request type PM_QOS_REQ_ALL_CORES is
-	 * applicable to all CPU cores that are online and
-	 * this would have a power impact when there are more
-	 * number of CPUs. This new PM_QOS_REQ_AFFINE_IRQ request
-	 * type shall update/apply the vote only to that CPU to
-	 * which this IRQ's affinity is set to.
-	 */
-	host->pm_qos_req_dma.type = PM_QOS_REQ_AFFINE_IRQ;
-	host->pm_qos_req_dma.irq = host->irq;
+#ifdef CONFIG_MMC_CQ_HCI
+static void sdhci_cmdq_set_transfer_params(struct mmc_host *mmc)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	u8 ctrl;
+
+	if (host->version >= SDHCI_SPEC_200) {
+		ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
+		ctrl &= ~SDHCI_CTRL_DMA_MASK;
+		if (host->flags & SDHCI_USE_64_BIT_DMA)
+			ctrl |= SDHCI_CTRL_ADMA64;
+		else
+			ctrl |= SDHCI_CTRL_ADMA32;
+		sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
+	}
+	if (host->ops->toggle_cdr)
+		host->ops->toggle_cdr(host, false);
+}
+
+static void sdhci_cmdq_clear_set_irqs(struct mmc_host *mmc, bool clear)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+	u32 ier = 0;
+
+	ier &= ~SDHCI_INT_ALL_MASK;
+
+	if (clear) {
+		ier = SDHCI_INT_CMDQ_EN | SDHCI_INT_ERROR_MASK;
+		sdhci_writel(host, ier, SDHCI_INT_ENABLE);
+		sdhci_writel(host, ier, SDHCI_SIGNAL_ENABLE);
+	} else {
+		ier = SDHCI_INT_BUS_POWER | SDHCI_INT_DATA_END_BIT |
+			     SDHCI_INT_DATA_CRC | SDHCI_INT_DATA_TIMEOUT |
+			     SDHCI_INT_INDEX | SDHCI_INT_END_BIT |
+			     SDHCI_INT_CRC | SDHCI_INT_TIMEOUT |
+			     SDHCI_INT_DATA_END | SDHCI_INT_RESPONSE |
+			     SDHCI_INT_AUTO_CMD_ERR;
+		sdhci_writel(host, ier, SDHCI_INT_ENABLE);
+		sdhci_writel(host, ier, SDHCI_SIGNAL_ENABLE);
+	}
+}
+
+static void sdhci_cmdq_set_data_timeout(struct mmc_host *mmc, u32 val)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	sdhci_writeb(host, val, SDHCI_TIMEOUT_CONTROL);
+}
+
+static void sdhci_cmdq_dump_vendor_regs(struct mmc_host *mmc)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	sdhci_dumpregs(host);
+}
+
+static int sdhci_cmdq_init(struct sdhci_host *host, struct mmc_host *mmc,
+			   bool dma64)
+{
+	return cmdq_init(host->cq_host, mmc, dma64);
+}
+
+static void sdhci_cmdq_set_block_size(struct mmc_host *mmc)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	sdhci_set_blk_size_reg(host, 512, 0);
+}
+
+static void sdhci_enhanced_strobe_mask(struct mmc_host *mmc, bool set)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	if (host->ops->enhanced_strobe_mask)
+		host->ops->enhanced_strobe_mask(host, set);
+}
+
+static void sdhci_cmdq_clear_set_dumpregs(struct mmc_host *mmc, bool set)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	if (host->ops->clear_set_dumpregs)
+		host->ops->clear_set_dumpregs(host, set);
+}
+
+static void sdhci_cmdq_post_cqe_halt(struct mmc_host *mmc)
+{
+	struct sdhci_host *host = mmc_priv(mmc);
+
+	sdhci_writel(host, sdhci_readl(host, SDHCI_INT_ENABLE) |
+			SDHCI_INT_RESPONSE, SDHCI_INT_ENABLE);
+	sdhci_writel(host, SDHCI_INT_RESPONSE, SDHCI_INT_STATUS);
 }
 #else
-static void sdhci_set_pmqos_req_type(struct sdhci_host *host)
+static void sdhci_cmdq_set_transfer_params(struct mmc_host *mmc)
 {
+
+}
+static void sdhci_cmdq_clear_set_irqs(struct mmc_host *mmc, bool clear)
+{
+
+}
+
+static void sdhci_cmdq_set_data_timeout(struct mmc_host *mmc, u32 val)
+{
+
+}
+
+static void sdhci_cmdq_dump_vendor_regs(struct mmc_host *mmc)
+{
+
+}
+
+static int sdhci_cmdq_init(struct sdhci_host *host, struct mmc_host *mmc,
+			   bool dma64)
+{
+	return -ENOSYS;
+}
+
+static void sdhci_cmdq_set_block_size(struct mmc_host *mmc)
+{
+
+}
+
+static void sdhci_enhanced_strobe_mask(struct mmc_host *mmc, bool set)
+{
+
+}
+
+static void sdhci_cmdq_clear_set_dumpregs(struct mmc_host *mmc, bool set)
+{
+
+}
+
+static void sdhci_cmdq_post_cqe_halt(struct mmc_host *mmc)
+{
+}
+#endif
+
+static const struct cmdq_host_ops sdhci_cmdq_ops = {
+	.clear_set_irqs = sdhci_cmdq_clear_set_irqs,
+	.set_data_timeout = sdhci_cmdq_set_data_timeout,
+	.dump_vendor_regs = sdhci_cmdq_dump_vendor_regs,
+	.set_block_size = sdhci_cmdq_set_block_size,
+	.clear_set_dumpregs = sdhci_cmdq_clear_set_dumpregs,
+	.enhanced_strobe_mask = sdhci_enhanced_strobe_mask,
+	.post_cqe_halt = sdhci_cmdq_post_cqe_halt,
+	.set_transfer_params = sdhci_cmdq_set_transfer_params,
+};
+
+#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
+static int sdhci_is_adma2_64bit(struct sdhci_host *host)
+{
+	u32 caps;
+
+	caps = (host->quirks & SDHCI_QUIRK_MISSING_CAPS) ? host->caps :
+		sdhci_readl(host, SDHCI_CAPABILITIES);
+
+	if (caps & SDHCI_CAN_64BIT)
+		return 1;
+	return 0;
+}
+#else
+static int sdhci_is_adma2_64bit(struct sdhci_host *host)
+{
+	return 0;
 }
 #endif
 
@@ -3455,6 +3836,7 @@
 int sdhci_setup_host(struct sdhci_host *host)
 {
 	struct mmc_host *mmc;
+	u32 caps[2] = {0, 0};
 	u32 max_current_caps;
 	unsigned int ocr_avail;
 	unsigned int override_timeout_clk;
@@ -3479,6 +3861,8 @@
 
 	sdhci_read_caps(host);
 
+	caps[0] = host->caps;
+
 	override_timeout_clk = host->timeout_clk;
 
 	if (host->version > SDHCI_SPEC_300) {
@@ -3516,7 +3900,7 @@
 	 * SDHCI_QUIRK2_BROKEN_64_BIT_DMA must be left to the drivers to
 	 * implement.
 	 */
-	if (host->caps & SDHCI_CAN_64BIT)
+	if (sdhci_is_adma2_64bit(host))
 		host->flags |= SDHCI_USE_64_BIT_DMA;
 
 	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
@@ -3674,6 +4058,9 @@
 	mmc->caps |= MMC_CAP_SDIO_IRQ | MMC_CAP_ERASE | MMC_CAP_CMD23;
 	mmc->caps2 |= MMC_CAP2_SDIO_IRQ_NOTHREAD;
 
+	if (caps[0] & SDHCI_CAN_ASYNC_INT)
+		mmc->caps2 |= MMC_CAP2_ASYNC_SDIO_IRQ_4BIT_MODE;
+
 	if (host->quirks & SDHCI_QUIRK_MULTIBLOCK_READ_ACMD12)
 		host->flags |= SDHCI_AUTO_CMD12;
 
@@ -3798,10 +4185,15 @@
 	 * value.
 	 */
 	max_current_caps = sdhci_readl(host, SDHCI_MAX_CURRENT);
-	if (!max_current_caps && !IS_ERR(mmc->supply.vmmc)) {
-		int curr = regulator_get_current_limit(mmc->supply.vmmc);
-		if (curr > 0) {
+	if (!max_current_caps) {
+		u32 curr = 0;
 
+		if (!IS_ERR(mmc->supply.vmmc))
+			curr = regulator_get_current_limit(mmc->supply.vmmc);
+		else if (host->ops->get_current_limit)
+			curr = host->ops->get_current_limit(host);
+
+		if (curr > 0) {
 			/* convert to SDHCI_MAX_CURRENT format */
 			curr = curr/1000;  /* convert to mA */
 			curr = curr/SDHCI_MAX_CURRENT_MULTIPLIER;
@@ -3965,6 +4357,8 @@
 
 	init_waitqueue_head(&host->buf_ready_int);
 
+	host->flags |= SDHCI_HOST_IRQ_STATUS;
+
 	sdhci_init(host, 0);
 
 	ret = request_threaded_irq(host->irq, sdhci_irq, sdhci_thread_irq,
@@ -3979,55 +4373,54 @@
 	sdhci_dumpregs(host);
 #endif
 
-	ret = sdhci_led_register(host);
-	if (ret) {
-		pr_err("%s: Failed to register LED device: %d\n",
-		       mmc_hostname(mmc), ret);
-		goto unirq;
+	if (!(host->quirks2 & SDHCI_QUIRK2_BROKEN_LED_CONTROL)) {
+		ret = sdhci_led_register(host);
+		if (ret) {
+			pr_err("%s: Failed to register LED device: %d\n",
+			       mmc_hostname(mmc), ret);
+			goto unirq;
+		}
 	}
 
 	mmiowb();
 
-	if (host->cpu_dma_latency_us) {
-		host->pm_qos_timeout_us = 10000; /* default value */
-		sdhci_set_pmqos_req_type(host);
-		pm_qos_add_request(&host->pm_qos_req_dma,
-				PM_QOS_CPU_DMA_LATENCY, PM_QOS_DEFAULT_VALUE);
-
-		host->pm_qos_tout.show = show_sdhci_pm_qos_tout;
-		host->pm_qos_tout.store = store_sdhci_pm_qos_tout;
-		sysfs_attr_init(&host->pm_qos_tout.attr);
-		host->pm_qos_tout.attr.name = "pm_qos_unvote_delay";
-		host->pm_qos_tout.attr.mode = S_IRUGO | S_IWUSR;
-		ret = device_create_file(mmc_dev(mmc), &host->pm_qos_tout);
-		if (ret)
-			pr_err("%s: cannot create pm_qos_unvote_delay %d\n",
-					mmc_hostname(mmc), ret);
-
-	}
-
-	ret = mmc_add_host(mmc);
-	if (ret)
-		goto unled;
-
 	if (host->quirks2 & SDHCI_QUIRK2_IGN_DATA_END_BIT_ERROR) {
 		host->ier = (host->ier & ~SDHCI_INT_DATA_END_BIT);
 		sdhci_writel(host, host->ier, SDHCI_INT_ENABLE);
 		sdhci_writel(host, host->ier, SDHCI_SIGNAL_ENABLE);
 	}
 
-	pr_info("%s: SDHCI controller on %s [%s] using %s\n",
-		mmc_hostname(mmc), host->hw_name, dev_name(mmc_dev(mmc)),
+	if (mmc->caps2 &  MMC_CAP2_CMD_QUEUE) {
+		bool dma64 = (host->flags & SDHCI_USE_64_BIT_DMA) ?
+			true : false;
+		ret = sdhci_cmdq_init(host, mmc, dma64);
+		if (ret)
+			pr_err("%s: CMDQ init: failed (%d)\n",
+			       mmc_hostname(host->mmc), ret);
+		else
+			host->cq_host->ops = &sdhci_cmdq_ops;
+	}
+
+	pr_info("%s: SDHCI controller on %s [%s] using %s in %s mode\n",
+	mmc_hostname(mmc), host->hw_name, dev_name(mmc_dev(mmc)),
 		(host->flags & SDHCI_USE_ADMA) ?
-		(host->flags & SDHCI_USE_64_BIT_DMA) ? "ADMA 64-bit" : "ADMA" :
-		(host->flags & SDHCI_USE_SDMA) ? "DMA" : "PIO");
+		((host->flags & SDHCI_USE_64_BIT_DMA) ?
+		"64-bit ADMA" : "32-bit ADMA") :
+		((host->flags & SDHCI_USE_SDMA) ? "DMA" : "PIO"),
+		((mmc->caps2 &  MMC_CAP2_CMD_QUEUE) && !ret) ?
+		"CMDQ" : "legacy");
 
 	sdhci_enable_card_detection(host);
 
+	ret = mmc_add_host(mmc);
+	if (ret)
+		goto unled;
+
 	return 0;
 
 unled:
-	sdhci_led_unregister(host);
+	if (!(host->quirks2 & SDHCI_QUIRK2_BROKEN_LED_CONTROL))
+		sdhci_led_unregister(host);
 unirq:
 	sdhci_do_reset(host, SDHCI_RESET_ALL);
 	sdhci_writel(host, 0, SDHCI_INT_ENABLE);
@@ -4083,11 +4476,10 @@
 
 	sdhci_disable_card_detection(host);
 
-	if (host->cpu_dma_latency_us)
-		pm_qos_remove_request(&host->pm_qos_req_dma);
 	mmc_remove_host(host->mmc);
 
-	sdhci_led_unregister(host);
+	if (!(host->quirks2 & SDHCI_QUIRK2_BROKEN_LED_CONTROL))
+		sdhci_led_unregister(host);
 
 	if (!dead)
 		sdhci_do_reset(host, SDHCI_RESET_ALL);
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 5e809d4..d9e656a 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -17,8 +17,7 @@
 #include <linux/compiler.h>
 #include <linux/types.h>
 #include <linux/io.h>
-#include <linux/pm_qos.h>
-
+#include <linux/ratelimit.h>
 #include <linux/mmc/host.h>
 
 /*
@@ -157,6 +156,8 @@
 		SDHCI_INT_DATA_TIMEOUT | SDHCI_INT_DATA_CRC | \
 		SDHCI_INT_DATA_END_BIT | SDHCI_INT_ADMA_ERROR | \
 		SDHCI_INT_BLK_GAP)
+
+#define SDHCI_INT_CMDQ_EN	(0x1 << 14)
 #define SDHCI_INT_ALL_MASK	((unsigned int)-1)
 
 #define SDHCI_AUTO_CMD_ERR		0x3C
@@ -183,6 +184,7 @@
 #define   SDHCI_CTRL_DRV_TYPE_D		0x0030
 #define  SDHCI_CTRL_EXEC_TUNING		0x0040
 #define  SDHCI_CTRL_TUNED_CLK		0x0080
+#define  SDHCI_CTRL_ASYNC_INT_ENABLE	0x4000
 #define  SDHCI_CTRL_PRESET_VAL_ENABLE	0x8000
 
 #define SDHCI_CAPABILITIES	0x40
@@ -204,6 +206,7 @@
 #define  SDHCI_CAN_VDD_300	0x02000000
 #define  SDHCI_CAN_VDD_180	0x04000000
 #define  SDHCI_CAN_64BIT	0x10000000
+#define  SDHCI_CAN_ASYNC_INT	0x20000000
 
 #define  SDHCI_SUPPORT_SDR50	0x00000001
 #define  SDHCI_SUPPORT_SDR104	0x00000002
@@ -340,6 +343,7 @@
 enum sdhci_power_policy {
 	SDHCI_PERFORMANCE_MODE,
 	SDHCI_POWER_SAVE_MODE,
+	SDHCI_POWER_POLICY_NUM /* Always keep this one last */
 };
 
 struct sdhci_host {
@@ -451,6 +455,11 @@
 */
 #define SDHCI_QUIRK2_SLOW_INT_CLR			(1<<18)
 
+/*
+ * If the base clock can be scalable, then there should be no further
+ * clock dividing as the input clock itself will be scaled down to
+ * required frequency.
+ */
 #define SDHCI_QUIRK2_ALWAYS_USE_BASE_CLOCK		(1<<19)
 
 /*
@@ -459,20 +468,20 @@
  * could be lager than the maximum timeout value that controller
  * can handle.
  */
-#define SDHCI_QUIRK2_IGNORE_DATATOUT_FOR_R1BCMD		(1<<21)
+#define SDHCI_QUIRK2_IGNORE_DATATOUT_FOR_R1BCMD		(1<<20)
 
 /*
  * The preset value registers are not properly initialized by
  * some hardware and hence preset value must not be enabled for
  * such controllers.
  */
-#define SDHCI_QUIRK2_BROKEN_PRESET_VALUE		(1<<22)
+#define SDHCI_QUIRK2_BROKEN_PRESET_VALUE		(1<<21)
 /*
  * Some controllers define the usage of 0xF in data timeout counter
  * register (0x2E) which is actually a reserved bit as per
  * specification.
  */
-#define SDHCI_QUIRK2_USE_RESERVED_MAX_TIMEOUT		(1<<23)
+#define SDHCI_QUIRK2_USE_RESERVED_MAX_TIMEOUT		(1<<22)
 /*
  * This is applicable for controllers that advertize timeout clock
  * value in capabilities register (bit 5-0) as just 50MHz whereas the
@@ -492,6 +501,26 @@
  * 1-bit mode of SDIO.
  */
 #define SDHCI_QUIRK2_IGN_DATA_END_BIT_ERROR             (1<<24)
+
+/* Use reset workaround in case sdhci reset timeouts */
+#define SDHCI_QUIRK2_USE_RESET_WORKAROUND		(1<<26)
+
+/* Some controllers doesn't have have any LED control */
+#define SDHCI_QUIRK2_BROKEN_LED_CONTROL			(1<<27)
+
+/*
+ * Some controllers doesn't follow the tuning procedure as defined in spec.
+ * The tuning data has to be compared from SW driver to validate the correct
+ * phase.
+ */
+#define SDHCI_QUIRK2_NON_STANDARD_TUNING (1 << 28)
+/*
+ * Some controllers may use PIO mode to workaround HW issues in ADMA for
+ * eMMC tuning commands.
+ */
+#define SDHCI_QUIRK2_USE_PIO_FOR_EMMC_TUNING (1 << 23)
+
+
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
 
@@ -501,6 +530,7 @@
 	struct mmc_host *mmc;	/* MMC structure */
 	struct mmc_host_ops mmc_host_ops;	/* MMC host ops */
 	u64 dma_mask;		/* custom DMA mask */
+	u64 coherent_dma_mask;
 
 #if IS_ENABLED(CONFIG_LEDS_CLASS)
 	struct led_classdev led;	/* LED control */
@@ -524,6 +554,7 @@
 #define SDHCI_SIGNALING_330	(1<<14)	/* Host is capable of 3.3V signaling */
 #define SDHCI_SIGNALING_180	(1<<15)	/* Host is capable of 1.8V signaling */
 #define SDHCI_SIGNALING_120	(1<<16)	/* Host is capable of 1.2V signaling */
+#define SDHCI_HOST_IRQ_STATUS	 (1<<17) /* host->irq status */
 
 	unsigned int version;	/* SDHCI spec. version */
 
@@ -557,10 +588,6 @@
 	size_t adma_table_sz;	/* ADMA descriptor table size */
 	size_t align_buffer_sz;	/* Bounce buffer size */
 
-	unsigned int adma_desc_sz; /* ADMA descriptor table size */
-	unsigned int align_buf_sz; /* Bounce buffer size */
-	unsigned int adma_max_desc; /* Max ADMA descriptos (max sg segments) */
-
 	dma_addr_t adma_addr;	/* Mapped ADMA descr. table */
 	dma_addr_t align_addr;	/* Mapped bounce buffer */
 
@@ -596,16 +623,18 @@
 #define SDHCI_TUNING_MODE_2	1
 #define SDHCI_TUNING_MODE_3	2
 
-	unsigned int		cpu_dma_latency_us;
-	struct pm_qos_request	pm_qos_req_dma;
 	ktime_t data_start_time;
 
-	unsigned int pm_qos_timeout_us;         /* timeout for PM QoS request */
-	struct device_attribute pm_qos_tout;
-
 	enum sdhci_power_policy power_policy;
 
+	bool sdio_irq_async_status;
+
 	u32 auto_cmd_err_sts;
+	struct ratelimit_state dbg_dump_rs;
+	struct cmdq_host *cq_host;
+	int reset_wa_applied; /* reset workaround status */
+	ktime_t reset_wa_t; /* time when the reset workaround is applied */
+	int reset_wa_cnt; /* total number of times workaround is used */
 
 	unsigned long private[0] ____cacheline_aligned;
 };
@@ -646,6 +675,7 @@
 #define REQ_IO_LOW	(1 << 2)
 #define REQ_IO_HIGH	(1 << 3)
 	void    (*card_event)(struct sdhci_host *host);
+	int	(*enhanced_strobe)(struct sdhci_host *host);
 	void	(*platform_bus_voting)(struct sdhci_host *host, u32 enable);
 	void	(*toggle_cdr)(struct sdhci_host *host, bool enable);
 	void	(*check_power_status)(struct sdhci_host *host, u32 req_type);
@@ -653,12 +683,20 @@
 					  bool enable,
 					  u32 type);
 	int	(*enable_controller_clock)(struct sdhci_host *host);
+	void	(*clear_set_dumpregs)(struct sdhci_host *host, bool set);
+	void	(*enhanced_strobe_mask)(struct sdhci_host *host, bool set);
 	void	(*dump_vendor_regs)(struct sdhci_host *host);
 	void	(*voltage_switch)(struct sdhci_host *host);
 	int	(*select_drive_strength)(struct sdhci_host *host,
 					 struct mmc_card *card,
 					 unsigned int max_dtr, int host_drv,
 					 int card_drv, int *drv_type);
+	int	(*notify_load)(struct sdhci_host *host, enum mmc_load state);
+	void	(*reset_workaround)(struct sdhci_host *host, u32 enable);
+	void	(*init)(struct sdhci_host *host);
+	void	(*pre_req)(struct sdhci_host *host, struct mmc_request *req);
+	void	(*post_req)(struct sdhci_host *host, struct mmc_request *req);
+	unsigned int	(*get_current_limit)(struct sdhci_host *host);
 };
 
 #ifdef CONFIG_MMC_SDHCI_IO_ACCESSORS
@@ -793,4 +831,5 @@
 extern int sdhci_runtime_resume_host(struct sdhci_host *host);
 #endif
 
+void sdhci_cfg_irq(struct sdhci_host *host, bool enable, bool sync);
 #endif /* __SDHCI_HW_H */
diff --git a/drivers/platform/msm/ipa/ipa_v3/ipa_utils.c b/drivers/platform/msm/ipa/ipa_v3/ipa_utils.c
index bc9f693..82c71353b 100644
--- a/drivers/platform/msm/ipa/ipa_v3/ipa_utils.c
+++ b/drivers/platform/msm/ipa/ipa_v3/ipa_utils.c
@@ -1149,18 +1149,18 @@
 
 static struct msm_bus_paths ipa_usecases_v3_0[]  = {
 	{
-		ARRAY_SIZE(ipa_init_vectors_v3_0),
-		ipa_init_vectors_v3_0,
+		.num_paths = ARRAY_SIZE(ipa_init_vectors_v3_0),
+		.vectors = ipa_init_vectors_v3_0,
 	},
 	{
-		ARRAY_SIZE(ipa_nominal_perf_vectors_v3_0),
-		ipa_nominal_perf_vectors_v3_0,
+		.num_paths = ARRAY_SIZE(ipa_nominal_perf_vectors_v3_0),
+		.vectors = ipa_nominal_perf_vectors_v3_0,
 	},
 };
 
 static struct msm_bus_scale_pdata ipa_bus_client_pdata_v3_0 = {
-	ipa_usecases_v3_0,
-	ARRAY_SIZE(ipa_usecases_v3_0),
+	.usecase = ipa_usecases_v3_0,
+	.num_usecases = ARRAY_SIZE(ipa_usecases_v3_0),
 	.name = "ipa",
 };
 
diff --git a/drivers/power/supply/qcom/fg-core.h b/drivers/power/supply/qcom/fg-core.h
index 48fe04f..ca551a5a 100644
--- a/drivers/power/supply/qcom/fg-core.h
+++ b/drivers/power/supply/qcom/fg-core.h
@@ -165,12 +165,14 @@
 	FG_SRAM_ESR_PULSE_THRESH,
 	FG_SRAM_SYS_TERM_CURR,
 	FG_SRAM_CHG_TERM_CURR,
+	FG_SRAM_CHG_TERM_BASE_CURR,
 	FG_SRAM_DELTA_MSOC_THR,
 	FG_SRAM_DELTA_BSOC_THR,
 	FG_SRAM_RECHARGE_SOC_THR,
 	FG_SRAM_RECHARGE_VBATT_THR,
 	FG_SRAM_KI_COEFF_MED_DISCHG,
 	FG_SRAM_KI_COEFF_HI_DISCHG,
+	FG_SRAM_KI_COEFF_FULL_SOC,
 	FG_SRAM_ESR_TIGHT_FILTER,
 	FG_SRAM_ESR_BROAD_FILTER,
 	FG_SRAM_SLOPE_LIMIT,
@@ -229,6 +231,7 @@
 	int	empty_volt_mv;
 	int	vbatt_low_thr_mv;
 	int	chg_term_curr_ma;
+	int	chg_term_base_curr_ma;
 	int	sys_term_curr_ma;
 	int	delta_soc_thr;
 	int	recharge_soc_thr;
@@ -356,6 +359,7 @@
 	u32			rradc_base;
 	u32			wa_flags;
 	int			batt_id_ohms;
+	int			ki_coeff_full_soc;
 	int			charge_status;
 	int			prev_charge_status;
 	int			charge_done;
diff --git a/drivers/power/supply/qcom/qpnp-fg-gen3.c b/drivers/power/supply/qcom/qpnp-fg-gen3.c
index 7ab5b31..b42a65d 100644
--- a/drivers/power/supply/qcom/qpnp-fg-gen3.c
+++ b/drivers/power/supply/qcom/qpnp-fg-gen3.c
@@ -52,6 +52,8 @@
 #define KI_COEFF_HI_DISCHG_OFFSET	0
 #define KI_COEFF_LOW_DISCHG_WORD	10
 #define KI_COEFF_LOW_DISCHG_OFFSET	2
+#define KI_COEFF_FULL_SOC_WORD		12
+#define KI_COEFF_FULL_SOC_OFFSET	2
 #define DELTA_MSOC_THR_WORD		12
 #define DELTA_MSOC_THR_OFFSET		3
 #define DELTA_BSOC_THR_WORD		13
@@ -128,6 +130,7 @@
 #define RECHARGE_SOC_THR_v2_WORD	14
 #define RECHARGE_SOC_THR_v2_OFFSET	1
 #define CHG_TERM_CURR_v2_WORD		15
+#define CHG_TERM_BASE_CURR_v2_OFFSET	0
 #define CHG_TERM_CURR_v2_OFFSET		1
 #define EMPTY_VOLT_v2_WORD		15
 #define EMPTY_VOLT_v2_OFFSET		3
@@ -226,6 +229,9 @@
 	PARAM(KI_COEFF_HI_DISCHG, KI_COEFF_HI_DISCHG_WORD,
 		KI_COEFF_HI_DISCHG_OFFSET, 1, 1000, 244141, 0,
 		fg_encode_default, NULL),
+	PARAM(KI_COEFF_FULL_SOC, KI_COEFF_FULL_SOC_WORD,
+		KI_COEFF_FULL_SOC_OFFSET, 1, 1000, 244141, 0,
+		fg_encode_default, NULL),
 	PARAM(ESR_TIGHT_FILTER, ESR_FILTER_WORD, ESR_UPD_TIGHT_OFFSET,
 		1, 512, 1000000, 0, fg_encode_default, NULL),
 	PARAM(ESR_BROAD_FILTER, ESR_FILTER_WORD, ESR_UPD_BROAD_OFFSET,
@@ -270,6 +276,9 @@
 		1000000, 122070, 0, fg_encode_current, NULL),
 	PARAM(CHG_TERM_CURR, CHG_TERM_CURR_v2_WORD, CHG_TERM_CURR_v2_OFFSET, 1,
 		100000, 390625, 0, fg_encode_current, NULL),
+	PARAM(CHG_TERM_BASE_CURR, CHG_TERM_CURR_v2_WORD,
+		CHG_TERM_BASE_CURR_v2_OFFSET, 1, 1024, 1000, 0,
+		fg_encode_current, NULL),
 	PARAM(DELTA_MSOC_THR, DELTA_MSOC_THR_v2_WORD, DELTA_MSOC_THR_v2_OFFSET,
 		1, 2048, 100, 0, fg_encode_default, NULL),
 	PARAM(DELTA_BSOC_THR, DELTA_BSOC_THR_v2_WORD, DELTA_BSOC_THR_v2_OFFSET,
@@ -298,6 +307,9 @@
 	PARAM(KI_COEFF_HI_DISCHG, KI_COEFF_HI_DISCHG_v2_WORD,
 		KI_COEFF_HI_DISCHG_v2_OFFSET, 1, 1000, 244141, 0,
 		fg_encode_default, NULL),
+	PARAM(KI_COEFF_FULL_SOC, KI_COEFF_FULL_SOC_WORD,
+		KI_COEFF_FULL_SOC_OFFSET, 1, 1000, 244141, 0,
+		fg_encode_default, NULL),
 	PARAM(ESR_TIGHT_FILTER, ESR_FILTER_WORD, ESR_UPD_TIGHT_OFFSET,
 		1, 512, 1000000, 0, fg_encode_default, NULL),
 	PARAM(ESR_BROAD_FILTER, ESR_FILTER_WORD, ESR_UPD_BROAD_OFFSET,
@@ -1482,6 +1494,37 @@
 	return 0;
 }
 
+#define KI_COEFF_FULL_SOC_DEFAULT	733
+static int fg_adjust_ki_coeff_full_soc(struct fg_chip *chip, int batt_temp)
+{
+	int rc, ki_coeff_full_soc;
+	u8 val;
+
+	if (batt_temp < 0)
+		ki_coeff_full_soc = 0;
+	else
+		ki_coeff_full_soc = KI_COEFF_FULL_SOC_DEFAULT;
+
+	if (chip->ki_coeff_full_soc == ki_coeff_full_soc)
+		return 0;
+
+	fg_encode(chip->sp, FG_SRAM_KI_COEFF_FULL_SOC, ki_coeff_full_soc, &val);
+	rc = fg_sram_write(chip,
+			chip->sp[FG_SRAM_KI_COEFF_FULL_SOC].addr_word,
+			chip->sp[FG_SRAM_KI_COEFF_FULL_SOC].addr_byte, &val,
+			chip->sp[FG_SRAM_KI_COEFF_FULL_SOC].len,
+			FG_IMA_DEFAULT);
+	if (rc < 0) {
+		pr_err("Error in writing ki_coeff_full_soc, rc=%d\n", rc);
+		return rc;
+	}
+
+	chip->ki_coeff_full_soc = ki_coeff_full_soc;
+	fg_dbg(chip, FG_STATUS, "Wrote ki_coeff_full_soc %d\n",
+		ki_coeff_full_soc);
+	return 0;
+}
+
 static int fg_set_recharge_voltage(struct fg_chip *chip, int voltage_mv)
 {
 	u8 buf;
@@ -2066,6 +2109,11 @@
 		if (rc < 0)
 			pr_err("Error in configuring slope limiter rc:%d\n",
 				rc);
+
+		rc = fg_adjust_ki_coeff_full_soc(chip, batt_temp);
+		if (rc < 0)
+			pr_err("Error in configuring ki_coeff_full_soc rc:%d\n",
+				rc);
 	}
 
 	fg_batt_avg_update(chip);
@@ -3101,6 +3149,21 @@
 		return rc;
 	}
 
+	if (!(chip->wa_flags & PMI8998_V1_REV_WA)) {
+		fg_encode(chip->sp, FG_SRAM_CHG_TERM_BASE_CURR,
+			chip->dt.chg_term_base_curr_ma, buf);
+		rc = fg_sram_write(chip,
+				chip->sp[FG_SRAM_CHG_TERM_BASE_CURR].addr_word,
+				chip->sp[FG_SRAM_CHG_TERM_BASE_CURR].addr_byte,
+				buf, chip->sp[FG_SRAM_CHG_TERM_BASE_CURR].len,
+				FG_IMA_DEFAULT);
+		if (rc < 0) {
+			pr_err("Error in writing chg_term_base_curr, rc=%d\n",
+				rc);
+			return rc;
+		}
+	}
+
 	if (chip->dt.vbatt_low_thr_mv > 0) {
 		fg_encode(chip->sp, FG_SRAM_VBATT_LOW,
 			chip->dt.vbatt_low_thr_mv, buf);
@@ -3409,6 +3472,10 @@
 	if (rc < 0)
 		pr_err("Error in configuring slope limiter rc:%d\n", rc);
 
+	rc = fg_adjust_ki_coeff_full_soc(chip, batt_temp);
+	if (rc < 0)
+		pr_err("Error in configuring ki_coeff_full_soc rc:%d\n", rc);
+
 	if (!batt_psy_initialized(chip)) {
 		chip->last_batt_temp = batt_temp;
 		return IRQ_HANDLED;
@@ -3755,6 +3822,7 @@
 #define DEFAULT_EMPTY_VOLT_MV		2800
 #define DEFAULT_RECHARGE_VOLT_MV	4250
 #define DEFAULT_CHG_TERM_CURR_MA	100
+#define DEFAULT_CHG_TERM_BASE_CURR_MA	75
 #define DEFAULT_SYS_TERM_CURR_MA	-125
 #define DEFAULT_DELTA_SOC_THR		1
 #define DEFAULT_RECHARGE_SOC_THR	95
@@ -3909,6 +3977,12 @@
 	else
 		chip->dt.sys_term_curr_ma = temp;
 
+	rc = of_property_read_u32(node, "qcom,fg-chg-term-base-current", &temp);
+	if (rc < 0)
+		chip->dt.chg_term_base_curr_ma = DEFAULT_CHG_TERM_BASE_CURR_MA;
+	else
+		chip->dt.chg_term_base_curr_ma = temp;
+
 	rc = of_property_read_u32(node, "qcom,fg-delta-soc-thr", &temp);
 	if (rc < 0)
 		chip->dt.delta_soc_thr = DEFAULT_DELTA_SOC_THR;
@@ -4147,6 +4221,7 @@
 	chip->irqs = fg_irqs;
 	chip->charge_status = -EINVAL;
 	chip->prev_charge_status = -EINVAL;
+	chip->ki_coeff_full_soc = -EINVAL;
 	chip->regmap = dev_get_regmap(chip->dev->parent, NULL);
 	if (!chip->regmap) {
 		dev_err(chip->dev, "Parent regmap is unavailable\n");
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 7b91717..14358cb 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -1404,7 +1404,7 @@
 	 * state to CLKS_ON.
 	 */
 	if (hba->clk_gating.is_suspended ||
-		(hba->clk_gating.state == REQ_CLKS_ON)) {
+		(hba->clk_gating.state != REQ_CLKS_OFF)) {
 		hba->clk_gating.state = CLKS_ON;
 		trace_ufshcd_clk_gating(dev_name(hba->dev),
 			hba->clk_gating.state);
diff --git a/drivers/soc/qcom/glink.c b/drivers/soc/qcom/glink.c
index 8cd5d3c..fd4c604 100644
--- a/drivers/soc/qcom/glink.c
+++ b/drivers/soc/qcom/glink.c
@@ -1846,7 +1846,7 @@
 
 /**
  * ch_name_to_ch_ctx_create() - lookup a channel by name, create the channel if
- *                              it is not found.
+ *                              it is not found and get reference of context.
  * @xprt_ctx:	Transport to search for a matching channel.
  * @name:	Name of the desired channel.
  *
@@ -1902,6 +1902,7 @@
 			spin_unlock_irqrestore(&xprt_ctx->xprt_ctx_lock_lhb1,
 					flags);
 			kfree(ctx);
+			rwref_get(&entry->ch_state_lhb2);
 			rwref_write_put(&xprt_ctx->xprt_state_lhb0);
 			return entry;
 		}
@@ -1935,6 +1936,7 @@
 			"%s: local:GLINK_CHANNEL_CLOSED\n",
 			__func__);
 	}
+	rwref_get(&ctx->ch_state_lhb2);
 	spin_unlock_irqrestore(&xprt_ctx->xprt_ctx_lock_lhb1, flags);
 	rwref_write_put(&xprt_ctx->xprt_state_lhb0);
 	mutex_lock(&xprt_ctx->xprt_dbgfs_lock_lhb4);
@@ -2579,6 +2581,7 @@
 		GLINK_INFO_CH_XPRT(ctx, transport_ptr,
 		"%s: Channel not ready to be re-opened. State: %u\n",
 		__func__, ctx->local_open_state);
+		rwref_put(&ctx->ch_state_lhb2);
 		return ERR_PTR(-EBUSY);
 	}
 
@@ -2627,11 +2630,13 @@
 		ctx->local_open_state = GLINK_CHANNEL_CLOSED;
 		GLINK_ERR_CH(ctx, "%s: Unable to send open command %d\n",
 			__func__, ret);
+		rwref_put(&ctx->ch_state_lhb2);
 		return ERR_PTR(ret);
 	}
 
 	GLINK_INFO_CH(ctx, "%s: Created channel, sent OPEN command. ctx %p\n",
 			__func__, ctx);
+	rwref_put(&ctx->ch_state_lhb2);
 	return ctx;
 }
 EXPORT_SYMBOL(glink_open);
@@ -4805,6 +4810,7 @@
 		GLINK_ERR_CH(ctx,
 		       "%s: Duplicate remote open for rcid %u, name '%s'\n",
 		       __func__, rcid, name);
+		rwref_put(&ctx->ch_state_lhb2);
 		glink_core_migration_edge_unlock(if_ptr->glink_core_priv);
 		return;
 	}
@@ -4827,6 +4833,7 @@
 
 	if (do_migrate)
 		ch_migrate(NULL, ctx);
+	rwref_put(&ctx->ch_state_lhb2);
 	glink_core_migration_edge_unlock(if_ptr->glink_core_priv);
 }
 
diff --git a/drivers/soc/qcom/glink_smem_native_xprt.c b/drivers/soc/qcom/glink_smem_native_xprt.c
index 266c0a2..3c4759c 100644
--- a/drivers/soc/qcom/glink_smem_native_xprt.c
+++ b/drivers/soc/qcom/glink_smem_native_xprt.c
@@ -822,6 +822,12 @@
 							einfo->remote_proc_id,
 							SMEM_ITEM_CACHED_FLAG);
 		if (!einfo->rx_fifo)
+			einfo->rx_fifo = smem_get_entry(
+						SMEM_GLINK_NATIVE_XPRT_FIFO_1,
+							&einfo->rx_fifo_size,
+							einfo->remote_proc_id,
+							0);
+		if (!einfo->rx_fifo)
 			return false;
 	}
 
diff --git a/drivers/soc/qcom/msm_glink_pkt.c b/drivers/soc/qcom/msm_glink_pkt.c
index 2471d27..519bb611 100644
--- a/drivers/soc/qcom/msm_glink_pkt.c
+++ b/drivers/soc/qcom/msm_glink_pkt.c
@@ -625,14 +625,17 @@
 		return -ENETRESET;
 	}
 
+	mutex_lock(&devp->ch_lock);
 	if (!glink_rx_intent_exists(devp->handle, count)) {
 		ret  = glink_queue_rx_intent(devp->handle, devp, count);
 		if (ret) {
 			GLINK_PKT_ERR("%s: failed to queue_rx_intent ret[%d]\n",
 					__func__, ret);
+			mutex_unlock(&devp->ch_lock);
 			return ret;
 		}
 	}
+	mutex_unlock(&devp->ch_lock);
 
 	GLINK_PKT_INFO("Begin %s on glink_pkt_dev id:%d buffer_size %zu\n",
 		__func__, devp->i, count);
diff --git a/drivers/spi/spi-geni-qcom.c b/drivers/spi/spi-geni-qcom.c
index 403c799..db12900 100644
--- a/drivers/spi/spi-geni-qcom.c
+++ b/drivers/spi/spi-geni-qcom.c
@@ -16,6 +16,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/pm_runtime.h>
 #include <linux/qcom-geni-se.h>
 #include <linux/spi/spi.h>
 
diff --git a/drivers/tty/serial/msm_geni_serial.c b/drivers/tty/serial/msm_geni_serial.c
index 5b48323..87b26445 100644
--- a/drivers/tty/serial/msm_geni_serial.c
+++ b/drivers/tty/serial/msm_geni_serial.c
@@ -21,6 +21,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/qcom-geni-se.h>
 #include <linux/serial.h>
 #include <linux/serial_core.h>
diff --git a/drivers/usb/phy/phy-msm-ssusb-qmp.c b/drivers/usb/phy/phy-msm-ssusb-qmp.c
index 9ef34c3..8bdd9fd 100644
--- a/drivers/usb/phy/phy-msm-ssusb-qmp.c
+++ b/drivers/usb/phy/phy-msm-ssusb-qmp.c
@@ -126,6 +126,7 @@
 	struct clk		*pipe_clk;
 	struct reset_control	*phy_reset;
 	struct reset_control	*phy_phy_reset;
+	struct reset_control	*global_phy_reset;
 	bool			power_enabled;
 	bool			clk_enabled;
 	bool			cable_connected;
@@ -136,8 +137,6 @@
 	int			init_seq_len;
 	unsigned int		*qmp_phy_reg_offset;
 	int			reg_offset_cnt;
-
-	int			port_select;
 };
 
 static const struct of_device_id msm_usb_id_table[] = {
@@ -157,6 +156,7 @@
 };
 MODULE_DEVICE_TABLE(of, msm_usb_id_table);
 
+static void usb_qmp_powerup_phy(struct msm_ssphy_qmp *phy);
 static void msm_ssphy_qmp_enable_clks(struct msm_ssphy_qmp *phy, bool on);
 
 static inline char *get_cable_status_str(struct msm_ssphy_qmp *phy)
@@ -334,15 +334,13 @@
 
 	/* perform lane selection */
 	val = -EINVAL;
-	if (phy->phy.flags & PHY_LANE_A) {
+	if (phy->phy.flags & PHY_LANE_A)
 		val = SW_PORTSELECT_MX;
-		phy->port_select = PHY_LANE_A;
-	}
-
-	if (phy->phy.flags & PHY_LANE_B) {
+	else if (phy->phy.flags & PHY_LANE_B)
 		val = SW_PORTSELECT | SW_PORTSELECT_MX;
-		phy->port_select = PHY_LANE_B;
-	}
+
+	/* PHY must be powered up before updating portselect and phymode. */
+	usb_qmp_powerup_phy(phy);
 
 	switch (phy->phy.type) {
 	case USB_PHY_TYPE_USB3_DP:
@@ -362,10 +360,6 @@
 		writel_relaxed(USB3_MODE | DP_MODE,
 			phy->base + phy->phy_reg[USB3_DP_COM_PHY_MODE_CTRL]);
 
-		/* activate register access of LANE for both USB3 and DP */
-		writel_relaxed(USB3_SWI_ACT_ACCESS_EN | DP_SWI_ACT_ACCESS_EN,
-			phy->base + phy->phy_reg[USB3_DP_COM_SWI_CTRL]);
-
 		/* bring both QMP USB and QMP DP PHYs PCS block out of reset */
 		writel_relaxed(0x00,
 			phy->base + phy->phy_reg[USB3_DP_COM_RESET_OVRD_CTRL]);
@@ -396,9 +390,10 @@
 			phy->base + phy->phy_reg[USB3_DP_COM_POWER_DOWN_CTRL]);
 
 		/*
-		 * Don't write 0x0 to DP_COM_SW_RESET as next operation is to
-		 * update phymode and port select which needs DP_COM_SW_RESET
-		 * as 0x1.
+		 * Don't write 0x0 to DP_COM_SW_RESET here as portselect and
+		 * phymode operation needs DP_COM_SW_RESET as 0x1.
+		 * msm_ssphy_qmp_init() writes 0x0 to DP_COM_SW_RESET before
+		 * initializing PHY.
 		 */
 
 		/* intentional fall-through */
@@ -440,12 +435,12 @@
 
 	msm_ssphy_qmp_enable_clks(phy, true);
 
-	/* power up PHY */
-	usb_qmp_powerup_phy(phy);
-
 	/* select appropriate port select and PHY mode if applicable */
 	usb_qmp_update_portselect_phymode(phy);
 
+	/* power up PHY */
+	usb_qmp_powerup_phy(phy);
+
 	reg = (struct qmp_reg_val *)phy->qmp_phy_init_seq;
 
 	/* Main configuration */
@@ -494,28 +489,31 @@
 					phy);
 	int ret = 0;
 
-	/*
-	 * Avoid global reset if there is no change in port select which
-	 * shall be useful while changing PHY mode i.e. transition from
-	 * 4 LANE/2 LANE.
-	 */
-	if ((((phy->phy.flags & PHY_LANE_A) == phy->port_select)) ||
-			((phy->phy.flags & PHY_LANE_B) == phy->port_select))
-		goto exit;
-
 	dev_dbg(uphy->dev, "Global reset of QMP DP combo phy\n");
-	/* Assert QMP USB DP combo PHY reset */
+	/* Assert global PHY reset */
+	ret = reset_control_assert(phy->global_phy_reset);
+	if (ret) {
+		dev_err(uphy->dev, "global_phy_reset assert failed\n");
+		goto exit;
+	}
+
+	/* Assert QMP USB PHY reset */
 	ret = reset_control_assert(phy->phy_reset);
 	if (ret) {
 		dev_err(uphy->dev, "phy_reset assert failed\n");
 		goto exit;
 	}
 
-	/* De-Assert QMP USB DP combo PHY reset */
+	/* De-Assert QMP USB PHY reset */
 	ret = reset_control_deassert(phy->phy_reset);
 	if (ret)
 		dev_err(uphy->dev, "phy_reset deassert failed\n");
 
+	/* De-Assert global PHY reset */
+	ret = reset_control_deassert(phy->global_phy_reset);
+	if (ret)
+		dev_err(uphy->dev, "global_phy_reset deassert failed\n");
+
 exit:
 	return ret;
 }
@@ -809,7 +807,15 @@
 		goto err;
 	}
 
-	if (phy->phy.type == USB_PHY_TYPE_USB3) {
+	if (phy->phy.type == USB_PHY_TYPE_USB3_DP) {
+		phy->global_phy_reset = devm_reset_control_get(dev,
+						"global_phy_reset");
+		if (IS_ERR(phy->global_phy_reset)) {
+			ret = PTR_ERR(phy->global_phy_reset);
+			dev_dbg(dev, "failed to get global_phy_reset\n");
+			goto err;
+		}
+	} else {
 		phy->phy_phy_reset = devm_reset_control_get(dev,
 						"phy_phy_reset");
 		if (IS_ERR(phy->phy_phy_reset)) {
diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h
index 4fef190..f5678aa 100644
--- a/include/drm/drm_mipi_dsi.h
+++ b/include/drm/drm_mipi_dsi.h
@@ -21,12 +21,15 @@
 #define MIPI_DSI_MSG_REQ_ACK	BIT(0)
 /* use Low Power Mode to transmit message */
 #define MIPI_DSI_MSG_USE_LPM	BIT(1)
+/* read mipi_dsi_msg.ctrl and unicast to only that ctrls */
+#define MIPI_DSI_MSG_UNICAST	BIT(2)
 
 /**
  * struct mipi_dsi_msg - read/write DSI buffer
  * @channel: virtual channel id
  * @type: payload data type
  * @flags: flags controlling this message transmission
+ * @ctrl: ctrl index to transmit on
  * @tx_len: length of @tx_buf
  * @tx_buf: data to be written
  * @rx_len: length of @rx_buf
@@ -36,6 +39,7 @@
 	u8 channel;
 	u8 type;
 	u16 flags;
+	u32 ctrl;
 
 	size_t tx_len;
 	const void *tx_buf;
diff --git a/include/dt-bindings/clock/qcom,rpmh.h b/include/dt-bindings/clock/qcom,rpmh.h
index 778d11b..a31fa20 100644
--- a/include/dt-bindings/clock/qcom,rpmh.h
+++ b/include/dt-bindings/clock/qcom,rpmh.h
@@ -17,19 +17,17 @@
 /* RPMh controlled clocks */
 #define RPMH_CXO_CLK						0
 #define RPMH_CXO_CLK_A						1
-#define RPMH_LN_BB_CLK1						2
-#define RPMH_LN_BB_CLK1_A					3
-#define RPMH_LN_BB_CLK2						4
-#define RPMH_LN_BB_CLK2_A					5
-#define RPMH_LN_BB_CLK3						6
-#define RPMH_LN_BB_CLK3_A					7
-#define RPMH_RF_CLK1						8
-#define RPMH_RF_CLK1_A						9
-#define RPMH_RF_CLK2						10
-#define RPMH_RF_CLK2_A						11
-#define RPMH_RF_CLK3						12
-#define RPMH_RF_CLK3_A						13
-#define RPMH_QDSS_CLK						14
-#define RPMH_QDSS_A_CLK						15
+#define RPMH_LN_BB_CLK2						2
+#define RPMH_LN_BB_CLK2_A					3
+#define RPMH_LN_BB_CLK3						4
+#define RPMH_LN_BB_CLK3_A					5
+#define RPMH_RF_CLK1						6
+#define RPMH_RF_CLK1_A						7
+#define RPMH_RF_CLK2						8
+#define RPMH_RF_CLK2_A						9
+#define RPMH_RF_CLK3						10
+#define RPMH_RF_CLK3_A						11
+#define RPMH_QDSS_CLK						12
+#define RPMH_QDSS_A_CLK						13
 
 #endif
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index fc08c407..bda14ef 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -162,6 +162,7 @@
 	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
 	__REQ_PREFLUSH,		/* request for cache flush */
+	__REQ_BARRIER,		/* marks flush req as barrier */
 
 	/* bio only flags */
 	__REQ_RAHEAD,		/* read ahead, can fail anytime */
@@ -207,7 +208,7 @@
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 #define REQ_COMMON_MASK \
 	(REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \
-	 REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE)
+	 REQ_PREFLUSH | REQ_FUA | REQ_INTEGRITY | REQ_NOMERGE | REQ_BARRIER)
 #define REQ_CLONE_MASK		REQ_COMMON_MASK
 
 /* This mask is used for both bio and request merge checking */
@@ -220,6 +221,7 @@
 #define REQ_SORTED		(1ULL << __REQ_SORTED)
 #define REQ_SOFTBARRIER		(1ULL << __REQ_SOFTBARRIER)
 #define REQ_FUA			(1ULL << __REQ_FUA)
+#define REQ_BARRIER		(1ULL << __REQ_BARRIER)
 #define REQ_NOMERGE		(1ULL << __REQ_NOMERGE)
 #define REQ_STARTED		(1ULL << __REQ_STARTED)
 #define REQ_DONTPREP		(1ULL << __REQ_DONTPREP)
diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
index 7d41026..7d8b528 100644
--- a/include/linux/coresight-pmu.h
+++ b/include/linux/coresight-pmu.h
@@ -19,7 +19,7 @@
 #define _LINUX_CORESIGHT_PMU_H
 
 #define CORESIGHT_ETM_PMU_NAME "cs_etm"
-#define CORESIGHT_ETM_PMU_SEED  0x10
+#define CORESIGHT_ETM_PMU_SEED  0x01
 
 /* ETMv3.5/PTM's ETMCR config bit */
 #define ETM_OPT_CYCACC  12
@@ -33,7 +33,7 @@
 	 * the common convention is to have data trace IDs be I(N) + 1,
 	 * set instruction trace IDs as a function of the CPU number.
 	 */
-	return (CORESIGHT_ETM_PMU_SEED + (cpu * 2));
+	return (CORESIGHT_ETM_PMU_SEED + cpu);
 }
 
 #endif
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index d265f60..d0ec667 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -12,7 +12,11 @@
 
 #include <linux/device.h>
 #include <linux/mmc/core.h>
+#include <linux/mmc/mmc.h>
 #include <linux/mod_devicetable.h>
+#include <linux/notifier.h>
+
+#define MMC_CARD_CMDQ_BLK_SIZE 512
 
 struct mmc_cid {
 	unsigned int		manfid;
@@ -52,6 +56,7 @@
 	u8			sec_feature_support;
 	u8			rel_sectors;
 	u8			rel_param;
+	bool			enhanced_rpmb_supported;
 	u8			part_config;
 	u8			cache_ctrl;
 	u8			rst_n_function;
@@ -83,11 +88,13 @@
 	bool			hpi;			/* HPI support bit */
 	unsigned int		hpi_cmd;		/* cmd used as HPI */
 	bool			bkops;		/* background support bit */
-	bool			man_bkops_en;	/* manual bkops enable bit */
+	u8			bkops_en;	/* bkops enable */
 	unsigned int            data_sector_size;       /* 512 bytes or 4KB */
 	unsigned int            data_tag_unit_size;     /* DATA TAG UNIT size */
 	unsigned int		boot_ro_lock;		/* ro lock support */
 	bool			boot_ro_lockable;
+	u8			raw_ext_csd_cmdq;	/* 15 */
+	u8			raw_ext_csd_cache_ctrl;	/* 33 */
 	bool			ffu_capable;	/* Firmware upgrade support */
 #define MMC_FIRMWARE_LEN 8
 	u8			fwrev[MMC_FIRMWARE_LEN];  /* FW version */
@@ -95,7 +102,10 @@
 	u8			raw_partition_support;	/* 160 */
 	u8			raw_rpmb_size_mult;	/* 168 */
 	u8			raw_erased_mem_count;	/* 181 */
+	u8			raw_ext_csd_bus_width;	/* 183 */
 	u8			strobe_support;		/* 184 */
+#define MMC_STROBE_SUPPORT	(1 << 0)
+	u8			raw_ext_csd_hs_timing;	/* 185 */
 	u8			raw_ext_csd_structure;	/* 194 */
 	u8			raw_card_type;		/* 196 */
 	u8			raw_driver_strength;	/* 197 */
@@ -116,13 +126,20 @@
 	u8			raw_pwr_cl_200_360;	/* 237 */
 	u8			raw_pwr_cl_ddr_52_195;	/* 238 */
 	u8			raw_pwr_cl_ddr_52_360;	/* 239 */
+	u8			cache_flush_policy;	/* 240 */
+#define MMC_BKOPS_URGENCY_MASK 0x3
 	u8			raw_pwr_cl_ddr_200_360;	/* 253 */
 	u8			raw_bkops_status;	/* 246 */
 	u8			raw_sectors[4];		/* 212 - 4 bytes */
 	u8			pre_eol_info;		/* 267 */
 	u8			device_life_time_est_typ_a;	/* 268 */
 	u8			device_life_time_est_typ_b;	/* 269 */
+	u8			cmdq_depth;		/* 307 */
+	u8			cmdq_support;		/* 308 */
+	u8			barrier_support;	/* 486 */
+	u8			barrier_en;
 
+	u8			fw_version;		/* 254 */
 	unsigned int            feature_support;
 #define MMC_DISCARD_FEATURE	BIT(0)                  /* CMD38 feature */
 };
@@ -193,7 +210,8 @@
 				wide_bus:1,
 				high_power:1,
 				high_speed:1,
-				disable_cd:1;
+				disable_cd:1,
+				async_intr_sup:1;
 };
 
 struct sdio_cis {
@@ -222,6 +240,28 @@
 	MMC_BLK_NEW_REQUEST,
 };
 
+enum mmc_packed_stop_reasons {
+	EXCEEDS_SEGMENTS = 0,
+	EXCEEDS_SECTORS,
+	WRONG_DATA_DIR,
+	FLUSH_OR_DISCARD,
+	EMPTY_QUEUE,
+	REL_WRITE,
+	THRESHOLD,
+	LARGE_SEC_ALIGN,
+	RANDOM,
+	FUA,
+	MAX_REASONS,
+};
+
+struct mmc_wr_pack_stats {
+	u32 *packing_events;
+	u32 pack_stop_reason[MAX_REASONS];
+	spinlock_t lock;
+	bool enabled;
+	bool print_in_read;
+};
+
 /* The number of MMC physical partitions.  These consist of:
  * boot partitions (2), general purpose partitions (4) and
  * RPMB partition (1) in MMC v4.4.
@@ -246,6 +286,62 @@
 #define MMC_BLK_DATA_AREA_RPMB	(1<<3)
 };
 
+enum {
+	MMC_BKOPS_NO_OP,
+	MMC_BKOPS_NOT_CRITICAL,
+	MMC_BKOPS_PERF_IMPACT,
+	MMC_BKOPS_CRITICAL,
+	MMC_BKOPS_NUM_SEVERITY_LEVELS,
+};
+
+/**
+ * struct mmc_bkops_stats - BKOPS statistics
+ * @lock: spinlock used for synchronizing the debugfs and the runtime accesses
+ *	to this structure. No need to call with spin_lock_irq api
+ * @manual_start: number of times START_BKOPS was sent to the device
+ * @hpi: number of times HPI was sent to the device
+ * @auto_start: number of times AUTO_EN was set to 1
+ * @auto_stop: number of times AUTO_EN was set to 0
+ * @level: number of times the device reported the need for each level of
+ *	bkops handling
+ * @enabled: control over whether statistics should be gathered
+ *
+ * This structure is used to collect statistics regarding the bkops
+ * configuration and use-patterns. It is collected during runtime and can be
+ * shown to the user via a debugfs entry.
+ */
+struct mmc_bkops_stats {
+	spinlock_t	lock;
+	unsigned int	manual_start;
+	unsigned int	hpi;
+	unsigned int	auto_start;
+	unsigned int	auto_stop;
+	unsigned int	level[MMC_BKOPS_NUM_SEVERITY_LEVELS];
+	bool		enabled;
+};
+
+/**
+ * struct mmc_bkops_info - BKOPS data
+ * @stats: statistic information regarding bkops
+ * @needs_check: indication whether need to check with the device
+ *	whether it requires handling of BKOPS (CMD8)
+ * @needs_manual: indication whether have to send START_BKOPS
+ *	to the device
+ */
+struct mmc_bkops_info {
+	struct mmc_bkops_stats stats;
+	bool needs_check;
+	bool needs_bkops;
+	u32  retry_counter;
+};
+
+enum mmc_pon_type {
+	MMC_LONG_PON = 1,
+	MMC_SHRT_PON,
+};
+
+#define MMC_QUIRK_CMDQ_DELAY_BEFORE_DCMD 6 /* microseconds */
+
 /*
  * MMC device
  */
@@ -253,6 +349,10 @@
 	struct mmc_host		*host;		/* the host this device belongs to */
 	struct device		dev;		/* the device */
 	u32			ocr;		/* the current OCR setting */
+	unsigned long		clk_scaling_lowest;	/* lowest scaleable
+							 * frequency */
+	unsigned long		clk_scaling_highest;	/* highest scaleable
+							 * frequency */
 	unsigned int		rca;		/* relative card address of device */
 	unsigned int		type;		/* card type */
 #define MMC_TYPE_MMC		0		/* MMC card */
@@ -265,8 +365,10 @@
 #define MMC_STATE_BLOCKADDR	(1<<2)		/* card uses block-addressing */
 #define MMC_CARD_SDXC		(1<<3)		/* card is SDXC */
 #define MMC_CARD_REMOVED	(1<<4)		/* card has been removed */
-#define MMC_STATE_DOING_BKOPS	(1<<5)		/* card is doing BKOPS */
+#define MMC_STATE_DOING_BKOPS	(1<<5)		/* card is doing manual BKOPS */
 #define MMC_STATE_SUSPENDED	(1<<6)		/* card is suspended */
+#define MMC_STATE_CMDQ		(1<<12)         /* card is in cmd queue mode */
+#define MMC_STATE_AUTO_BKOPS	(1<<13)		/* card is doing auto BKOPS */
 	unsigned int		quirks; 	/* card quirks */
 #define MMC_QUIRK_LENIENT_FN0	(1<<0)		/* allow SDIO FN0 writes outside of the VS CCCR range */
 #define MMC_QUIRK_BLKSZ_FOR_BYTE_MODE (1<<1)	/* use func->cur_blksize */
@@ -285,6 +387,15 @@
 #define MMC_QUIRK_BROKEN_IRQ_POLLING	(1<<11)	/* Polling SDIO_CCCR_INTx could create a fake interrupt */
 #define MMC_QUIRK_TRIM_BROKEN	(1<<12)		/* Skip trim */
 #define MMC_QUIRK_BROKEN_HPI	(1<<13)		/* Disable broken HPI support */
+						/* byte mode */
+#define MMC_QUIRK_INAND_DATA_TIMEOUT  (1<<14)   /* For incorrect data timeout */
+#define MMC_QUIRK_CACHE_DISABLE (1 << 15)	/* prevent cache enable */
+#define MMC_QUIRK_QCA6574_SETTINGS (1 << 16)	/* QCA6574 card settings*/
+#define MMC_QUIRK_QCA9377_SETTINGS (1 << 17)	/* QCA9377 card settings*/
+
+
+/* Make sure CMDQ is empty before queuing DCMD */
+#define MMC_QUIRK_CMDQ_EMPTY_BEFORE_DCMD (1 << 18)
 
 
 	unsigned int		erase_size;	/* erase size in sectors */
@@ -320,6 +431,14 @@
 	struct dentry		*debugfs_root;
 	struct mmc_part	part[MMC_NUM_PHY_PARTITION]; /* physical partitions */
 	unsigned int    nr_parts;
+	unsigned int	part_curr;
+
+	struct mmc_wr_pack_stats wr_pack_stats; /* packed commands stats*/
+	struct notifier_block        reboot_notify;
+	enum mmc_pon_type pon_type;
+	u8 *cached_ext_csd;
+	bool cmdq_init;
+	struct mmc_bkops_info bkops;
 };
 
 /*
@@ -382,6 +501,19 @@
 
 #define END_FIXUP { NULL }
 
+/* extended CSD mapping to mmc version */
+enum mmc_version_ext_csd_rev {
+	MMC_V4_0,
+	MMC_V4_1,
+	MMC_V4_2,
+	MMC_V4_41 = 5,
+	MMC_V4_5,
+	MMC_V4_51 = MMC_V4_5,
+	MMC_V5_0,
+	MMC_V5_01 = MMC_V5_0,
+	MMC_V5_1
+};
+
 #define _FIXUP_EXT(_name, _manfid, _oemid, _rev_start, _rev_end,	\
 		   _cis_vendor, _cis_device,				\
 		   _fixup, _data, _ext_csd_rev)				\
@@ -457,6 +589,8 @@
 #define mmc_card_removed(c)	((c) && ((c)->state & MMC_CARD_REMOVED))
 #define mmc_card_doing_bkops(c)	((c)->state & MMC_STATE_DOING_BKOPS)
 #define mmc_card_suspended(c)	((c)->state & MMC_STATE_SUSPENDED)
+#define mmc_card_cmdq(c)       ((c)->state & MMC_STATE_CMDQ)
+#define mmc_card_doing_auto_bkops(c)	((c)->state & MMC_STATE_AUTO_BKOPS)
 
 #define mmc_card_set_present(c)	((c)->state |= MMC_STATE_PRESENT)
 #define mmc_card_set_readonly(c) ((c)->state |= MMC_STATE_READONLY)
@@ -467,6 +601,12 @@
 #define mmc_card_clr_doing_bkops(c)	((c)->state &= ~MMC_STATE_DOING_BKOPS)
 #define mmc_card_set_suspended(c) ((c)->state |= MMC_STATE_SUSPENDED)
 #define mmc_card_clr_suspended(c) ((c)->state &= ~MMC_STATE_SUSPENDED)
+#define mmc_card_set_cmdq(c)           ((c)->state |= MMC_STATE_CMDQ)
+#define mmc_card_clr_cmdq(c)           ((c)->state &= ~MMC_STATE_CMDQ)
+#define mmc_card_set_auto_bkops(c)	((c)->state |= MMC_STATE_AUTO_BKOPS)
+#define mmc_card_clr_auto_bkops(c)	((c)->state &= ~MMC_STATE_AUTO_BKOPS)
+
+#define mmc_card_strobe(c) (((c)->ext_csd).strobe_support & MMC_STROBE_SUPPORT)
 
 /*
  * Quirk add/remove for MMC products.
@@ -542,10 +682,37 @@
 	return c->quirks & MMC_QUIRK_BROKEN_HPI;
 }
 
+static inline bool mmc_card_support_auto_bkops(const struct mmc_card *c)
+{
+	return c->ext_csd.rev >= MMC_V5_1;
+}
+
+static inline bool mmc_card_configured_manual_bkops(const struct mmc_card *c)
+{
+	return c->ext_csd.bkops_en & EXT_CSD_BKOPS_MANUAL_EN;
+}
+
+static inline bool mmc_card_configured_auto_bkops(const struct mmc_card *c)
+{
+	return c->ext_csd.bkops_en & EXT_CSD_BKOPS_AUTO_EN;
+}
+
+static inline bool mmc_enable_qca6574_settings(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_QCA6574_SETTINGS;
+}
+
+static inline bool mmc_enable_qca9377_settings(const struct mmc_card *c)
+{
+	return c->quirks & MMC_QUIRK_QCA9377_SETTINGS;
+}
+
 #define mmc_card_name(c)	((c)->cid.prod_name)
 #define mmc_card_id(c)		(dev_name(&(c)->dev))
 
 #define mmc_dev_to_card(d)	container_of(d, struct mmc_card, dev)
+#define mmc_get_drvdata(c)	dev_get_drvdata(&(c)->dev)
+#define mmc_set_drvdata(c,d)	dev_set_drvdata(&(c)->dev, d)
 
 /*
  * MMC device driver (e.g., Flash card, I/O card...)
@@ -562,5 +729,9 @@
 
 extern void mmc_fixup_device(struct mmc_card *card,
 			     const struct mmc_fixup *table);
-
+extern struct mmc_wr_pack_stats *mmc_blk_get_packed_statistics(
+			struct mmc_card *card);
+extern void mmc_blk_init_packed_statistics(struct mmc_card *card);
+extern int mmc_send_pon(struct mmc_card *card);
+extern void mmc_blk_cmdq_req_done(struct mmc_request *mrq);
 #endif /* LINUX_MMC_CARD_H */
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 2b58cbd..5d5aff1 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -71,6 +71,8 @@
 	unsigned int		busy_timeout;	/* busy detect timeout in ms */
 	/* Set this flag only for blocking sanitize request */
 	bool			sanitize_busy;
+	/* Set this flag only for blocking bkops request */
+	bool			bkops_busy;
 
 	struct mmc_data		*data;		/* data segment associated with cmd */
 	struct mmc_request	*mrq;		/* associated request */
@@ -96,6 +98,7 @@
 	int			sg_count;	/* mapped sg entries */
 	struct scatterlist	*sg;		/* I/O scatter list */
 	s32			host_cookie;	/* host private data */
+	bool			fault_injected; /* fault injected */
 };
 
 struct mmc_host;
@@ -109,6 +112,8 @@
 	struct completion	cmd_completion;
 	void			(*done)(struct mmc_request *);/* completion function */
 	struct mmc_host		*host;
+	struct mmc_cmdq_req	*cmdq_req;
+	struct request *req;
 
 	/* Allow other commands during this ongoing data transfer or busy wait */
 	bool			cap_cmd_during_tfr;
@@ -118,8 +123,39 @@
 #endif
 };
 
+struct mmc_bus_ops {
+	void (*remove)(struct mmc_host *);
+	void (*detect)(struct mmc_host *);
+	int (*pre_suspend)(struct mmc_host *);
+	int (*suspend)(struct mmc_host *);
+	int (*resume)(struct mmc_host *);
+	int (*runtime_suspend)(struct mmc_host *);
+	int (*runtime_resume)(struct mmc_host *);
+	int (*runtime_idle)(struct mmc_host *);
+	int (*power_save)(struct mmc_host *);
+	int (*power_restore)(struct mmc_host *);
+	int (*alive)(struct mmc_host *);
+	int (*shutdown)(struct mmc_host *);
+	int (*reset)(struct mmc_host *);
+	int (*change_bus_speed)(struct mmc_host *, unsigned long *);
+};
+
 struct mmc_card;
 struct mmc_async_req;
+struct mmc_cmdq_req;
+
+extern int mmc_cmdq_discard_queue(struct mmc_host *host, u32 tasks);
+extern int mmc_cmdq_halt(struct mmc_host *host, bool enable);
+extern int mmc_cmdq_halt_on_empty_queue(struct mmc_host *host);
+extern void mmc_cmdq_post_req(struct mmc_host *host, int tag, int err);
+extern int mmc_cmdq_start_req(struct mmc_host *host,
+			      struct mmc_cmdq_req *cmdq_req);
+extern int mmc_cmdq_prepare_flush(struct mmc_command *cmd);
+extern int mmc_cmdq_wait_for_dcmd(struct mmc_host *host,
+			struct mmc_cmdq_req *cmdq_req);
+extern int mmc_cmdq_erase(struct mmc_cmdq_req *cmdq_req,
+	      struct mmc_card *card, unsigned int from, unsigned int nr,
+	      unsigned int arg);
 
 extern int mmc_stop_bkops(struct mmc_card *);
 extern int mmc_read_bkops_status(struct mmc_card *);
@@ -134,10 +170,15 @@
 extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *);
 extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *,
 	struct mmc_command *, int);
-extern void mmc_start_bkops(struct mmc_card *card, bool from_exception);
+extern void mmc_check_bkops(struct mmc_card *card);
+extern void mmc_start_manual_bkops(struct mmc_card *card);
 extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
+extern int __mmc_switch_cmdq_mode(struct mmc_command *cmd, u8 set, u8 index,
+				  u8 value, unsigned int timeout_ms,
+				  bool use_busy_signal, bool ignore_timeout);
 extern int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error);
 extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
+extern int mmc_set_auto_bkops(struct mmc_card *card, bool enable);
 
 #define MMC_ERASE_ARG		0x00000000
 #define MMC_SECURE_ERASE_ARG	0x80000000
@@ -164,6 +205,7 @@
 extern int mmc_set_blockcount(struct mmc_card *card, unsigned int blockcount,
 			      bool is_rel_write);
 extern int mmc_hw_reset(struct mmc_host *host);
+extern int mmc_cmdq_hw_reset(struct mmc_host *host);
 extern int mmc_can_reset(struct mmc_card *card);
 
 extern void mmc_set_data_timeout(struct mmc_data *, const struct mmc_card *);
@@ -174,11 +216,22 @@
 
 extern void mmc_get_card(struct mmc_card *card);
 extern void mmc_put_card(struct mmc_card *card);
+extern void __mmc_put_card(struct mmc_card *card);
 
+extern void mmc_set_ios(struct mmc_host *host);
 extern int mmc_flush_cache(struct mmc_card *);
+extern int mmc_cache_barrier(struct mmc_card *);
 
 extern int mmc_detect_card_removed(struct mmc_host *host);
 
+extern void mmc_blk_init_bkops_statistics(struct mmc_card *card);
+
+extern void mmc_deferred_scaling(struct mmc_host *host);
+extern void mmc_cmdq_clk_scaling_start_busy(struct mmc_host *host,
+	bool lock_needed);
+extern void mmc_cmdq_clk_scaling_stop_busy(struct mmc_host *host,
+	bool lock_needed, bool is_cmdq_dcmd);
+
 /**
  *	mmc_claim_host - exclusively claim a host
  *	@host: mmc host to claim
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 73bfe1a..ba1e826 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -15,6 +15,7 @@
 #include <linux/timer.h>
 #include <linux/sched.h>
 #include <linux/device.h>
+#include <linux/devfreq.h>
 #include <linux/fault-inject.h>
 #include <linux/blkdev.h>
 
@@ -22,10 +23,14 @@
 #include <linux/mmc/card.h>
 #include <linux/mmc/mmc.h>
 #include <linux/mmc/pm.h>
+#include <linux/mmc/ring_buffer.h>
+
+#define MMC_AUTOSUSPEND_DELAY_MS	3000
 
 struct mmc_ios {
 	unsigned int	clock;			/* clock rate */
 	unsigned int	old_rate;       /* saved clock rate */
+	unsigned long	clk_ts;         /* time stamp of last updated clock */
 	unsigned short	vdd;
 
 /* vdd stores the bit number of the selected voltage range from below. */
@@ -90,7 +95,19 @@
 	MMC_LOAD_LOW,
 };
 
+struct mmc_cmdq_host_ops {
+	int (*init)(struct mmc_host *host);
+	int (*enable)(struct mmc_host *host);
+	void (*disable)(struct mmc_host *host, bool soft);
+	int (*request)(struct mmc_host *host, struct mmc_request *mrq);
+	void (*post_req)(struct mmc_host *host, int tag, int err);
+	int (*halt)(struct mmc_host *host, bool halt);
+	void (*reset)(struct mmc_host *host, bool soft);
+	void (*dumpstate)(struct mmc_host *host);
+};
+
 struct mmc_host_ops {
+	int (*init)(struct mmc_host *host);
 	/*
 	 * 'enable' is called when the host is claimed and 'disable' is called
 	 * when the host is released. 'enable' and 'disable' are deprecated.
@@ -160,6 +177,7 @@
 
 	/* Prepare HS400 target operating frequency depending host driver */
 	int	(*prepare_hs400_tuning)(struct mmc_host *host, struct mmc_ios *ios);
+	int	(*enhanced_strobe)(struct mmc_host *host);
 	/* Prepare enhanced strobe depending host driver */
 	void	(*hs400_enhanced_strobe)(struct mmc_host *host,
 					 struct mmc_ios *ios);
@@ -175,12 +193,42 @@
 	 */
 	int	(*multi_io_quirk)(struct mmc_card *card,
 				  unsigned int direction, int blk_size);
+
+	unsigned long (*get_max_frequency)(struct mmc_host *host);
+	unsigned long (*get_min_frequency)(struct mmc_host *host);
+
 	int	(*notify_load)(struct mmc_host *, enum mmc_load);
+	void	(*notify_halt)(struct mmc_host *mmc, bool halt);
+	void	(*force_err_irq)(struct mmc_host *host, u64 errmask);
 };
 
 struct mmc_card;
 struct device;
 
+struct mmc_cmdq_req {
+	unsigned int cmd_flags;
+	u32 blk_addr;
+	/* active mmc request */
+	struct mmc_request	mrq;
+	struct mmc_data		data;
+	struct mmc_command	cmd;
+#define DCMD		(1 << 0)
+#define QBR		(1 << 1)
+#define DIR		(1 << 2)
+#define PRIO		(1 << 3)
+#define REL_WR		(1 << 4)
+#define DAT_TAG	(1 << 5)
+#define FORCED_PRG	(1 << 6)
+	unsigned int		cmdq_req_flags;
+
+	unsigned int		resp_idx;
+	unsigned int		resp_arg;
+	unsigned int		dev_pend_tasks;
+	bool			resp_err;
+	int			tag; /* used for command queuing */
+	u8			ctx_id;
+};
+
 struct mmc_async_req {
 	/* active mmc request */
 	struct mmc_request	*mrq;
@@ -207,6 +255,33 @@
 	void *handler_priv;
 };
 
+
+/**
+ * mmc_cmdq_context_info - describes the contexts of cmdq
+ * @active_reqs		requests being processed
+ * @data_active_reqs	data requests being processed
+ * @curr_state		state of cmdq engine
+ * @cmdq_ctx_lock	acquire this before accessing this structure
+ * @queue_empty_wq	workqueue for waiting for all
+ *			the outstanding requests to be completed
+ * @wait		waiting for all conditions described in
+ *			mmc_cmdq_ready_wait to be satisified before
+ *			issuing the new request to LLD.
+ */
+struct mmc_cmdq_context_info {
+	unsigned long	active_reqs; /* in-flight requests */
+	unsigned long	data_active_reqs; /* in-flight data requests */
+	unsigned long	curr_state;
+#define	CMDQ_STATE_ERR 0
+#define	CMDQ_STATE_DCMD_ACTIVE 1
+#define	CMDQ_STATE_HALT 2
+#define	CMDQ_STATE_CQ_DISABLE 3
+#define	CMDQ_STATE_REQ_TIMED_OUT 4
+	wait_queue_head_t	queue_empty_wq;
+	wait_queue_head_t	wait;
+	int active_small_sector_read_reqs;
+};
+
 /**
  * mmc_context_info - synchronization details for mmc context
  * @is_done_rcv		wake up reason was done request
@@ -231,11 +306,68 @@
 	struct regulator *vqmmc;	/* Optional Vccq supply */
 };
 
+enum dev_state {
+	DEV_SUSPENDING = 1,
+	DEV_SUSPENDED,
+	DEV_RESUMED,
+};
+
+/**
+ * struct mmc_devfeq_clk_scaling - main context for MMC clock scaling logic
+ *
+ * @lock: spinlock to protect statistics
+ * @devfreq: struct that represent mmc-host as a client for devfreq
+ * @devfreq_profile: MMC device profile, mostly polling interval and callbacks
+ * @ondemand_gov_data: struct supplied to ondemmand governor (thresholds)
+ * @state: load state, can be HIGH or LOW. used to notify mmc_host_ops callback
+ * @start_busy: timestamped armed once a data request is started
+ * @measure_interval_start: timestamped armed once a measure interval started
+ * @devfreq_abort: flag to sync between different contexts relevant to devfreq
+ * @skip_clk_scale_freq_update: flag that enable/disable frequency change
+ * @freq_table_sz: table size of frequencies supplied to devfreq
+ * @freq_table: frequencies table supplied to devfreq
+ * @curr_freq: current frequency
+ * @polling_delay_ms: polling interval for status collection used by devfreq
+ * @upthreshold: up-threshold supplied to ondemand governor
+ * @downthreshold: down-threshold supplied to ondemand governor
+ * @need_freq_change: flag indicating if a frequency change is required
+ * @clk_scaling_in_progress: flag indicating if there's ongoing frequency change
+ * @is_busy_started: flag indicating if a request is handled by the HW
+ * @enable: flag indicating if the clock scaling logic is enabled for this host
+ */
+struct mmc_devfeq_clk_scaling {
+	spinlock_t	lock;
+	struct		devfreq *devfreq;
+	struct		devfreq_dev_profile devfreq_profile;
+	struct		devfreq_simple_ondemand_data ondemand_gov_data;
+	enum mmc_load	state;
+	ktime_t		start_busy;
+	ktime_t		measure_interval_start;
+	atomic_t	devfreq_abort;
+	bool		skip_clk_scale_freq_update;
+	int		freq_table_sz;
+	u32		*freq_table;
+	unsigned long	total_busy_time_us;
+	unsigned long	target_freq;
+	unsigned long	curr_freq;
+	unsigned long	polling_delay_ms;
+	unsigned int	upthreshold;
+	unsigned int	downthreshold;
+	unsigned int	lower_bus_speed_mode;
+#define MMC_SCALING_LOWER_DDR52_MODE	1
+	bool		need_freq_change;
+	bool		clk_scaling_in_progress;
+	bool		is_busy_started;
+	bool		enable;
+};
+
 struct mmc_host {
 	struct device		*parent;
 	struct device		class_dev;
+	struct mmc_devfeq_clk_scaling	clk_scaling;
 	int			index;
 	const struct mmc_host_ops *ops;
+	const struct mmc_cmdq_host_ops *cmdq_ops;
 	struct mmc_pwrseq	*pwrseq;
 	unsigned int		f_min;
 	unsigned int		f_max;
@@ -331,6 +463,13 @@
 #define MMC_CAP2_CLK_SCALE	(1 << 24)	/* Allow dynamic clk scaling */
 #define MMC_CAP2_ASYNC_SDIO_IRQ_4BIT_MODE (1 << 25)	/* Allows Asynchronous SDIO irq while card is in 4-bit mode */
 #define MMC_CAP2_NONHOTPLUG	(1 << 26)	/*Don't support hotplug*/
+/* Some hosts need additional tuning */
+#define MMC_CAP2_HS400_POST_TUNING	(1 << 27)
+#define MMC_CAP2_CMD_QUEUE	(1 << 28)	/* support eMMC command queue */
+#define MMC_CAP2_SANITIZE       (1 << 29)               /* Support Sanitize */
+#define MMC_CAP2_SLEEP_AWAKE	(1 << 30)	/* Use Sleep/Awake (CMD5) */
+/* use max discard ignoring max_busy_timeout parameter */
+#define MMC_CAP2_MAX_DISCARD_SIZE	(1 << 31)
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
 
@@ -359,6 +498,7 @@
 	spinlock_t		lock;		/* lock for claim and bus ops */
 
 	struct mmc_ios		ios;		/* current io bus settings */
+	struct mmc_ios		cached_ios;
 
 	/* group bitfields together to minimize padding */
 	unsigned int		use_spi_crc:1;
@@ -386,6 +526,7 @@
 
 	wait_queue_head_t	wq;
 	struct task_struct	*claimer;	/* task that has host claimed */
+	struct task_struct	*suspend_task;
 	int			claim_cnt;	/* "claim" nesting count */
 
 	struct delayed_work	detect;
@@ -395,6 +536,10 @@
 	const struct mmc_bus_ops *bus_ops;	/* current bus driver */
 	unsigned int		bus_refs;	/* reference counter */
 
+	unsigned int		bus_resume_flags;
+#define MMC_BUSRESUME_MANUAL_RESUME	(1 << 0)
+#define MMC_BUSRESUME_NEEDS_RESUME	(1 << 1)
+
 	unsigned int		sdio_irqs;
 	struct task_struct	*sdio_irq_thread;
 	bool			sdio_irq_pending;
@@ -447,6 +592,33 @@
 	 * actually disabling the clock from it's source.
 	 */
 	bool			card_clock_off;
+
+#ifdef CONFIG_MMC_PERF_PROFILING
+	struct {
+
+		unsigned long rbytes_drv;  /* Rd bytes MMC Host  */
+		unsigned long wbytes_drv;  /* Wr bytes MMC Host  */
+		ktime_t rtime_drv;	   /* Rd time  MMC Host  */
+		ktime_t wtime_drv;	   /* Wr time  MMC Host  */
+		ktime_t start;
+	} perf;
+	bool perf_enable;
+#endif
+	struct mmc_trace_buffer trace_buf;
+	enum dev_state dev_status;
+	bool			wakeup_on_idle;
+	struct mmc_cmdq_context_info	cmdq_ctx;
+	int num_cq_slots;
+	int dcmd_cq_slot;
+	bool			cmdq_thist_enabled;
+	/*
+	 * several cmdq supporting host controllers are extensions
+	 * of legacy controllers. This variable can be used to store
+	 * a reference to the cmdq extension of the existing host
+	 * controller.
+	 */
+	void *cmdq_private;
+	struct mmc_request	*err_mrq;
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
@@ -470,11 +642,30 @@
 	return (void *)host->private;
 }
 
+static inline void *mmc_cmdq_private(struct mmc_host *host)
+{
+	return host->cmdq_private;
+}
+
 #define mmc_host_is_spi(host)	((host)->caps & MMC_CAP_SPI)
 
 #define mmc_dev(x)	((x)->parent)
 #define mmc_classdev(x)	(&(x)->class_dev)
 #define mmc_hostname(x)	(dev_name(&(x)->class_dev))
+#define mmc_bus_needs_resume(host) ((host)->bus_resume_flags & \
+				    MMC_BUSRESUME_NEEDS_RESUME)
+#define mmc_bus_manual_resume(host) ((host)->bus_resume_flags & \
+				MMC_BUSRESUME_MANUAL_RESUME)
+
+static inline void mmc_set_bus_resume_policy(struct mmc_host *host, int manual)
+{
+	if (manual)
+		host->bus_resume_flags |= MMC_BUSRESUME_MANUAL_RESUME;
+	else
+		host->bus_resume_flags &= ~MMC_BUSRESUME_MANUAL_RESUME;
+}
+
+extern int mmc_resume_bus(struct mmc_host *host);
 
 int mmc_power_save_host(struct mmc_host *host);
 int mmc_power_restore_host(struct mmc_host *host);
@@ -546,6 +737,12 @@
 	return !(host->caps2 & MMC_CAP2_BOOTPART_NOACC);
 }
 
+static inline bool mmc_card_and_host_support_async_int(struct mmc_host *host)
+{
+	return ((host->caps2 & MMC_CAP2_ASYNC_SDIO_IRQ_4BIT_MODE) &&
+			(host->card->cccr.async_intr_sup));
+}
+
 static inline int mmc_host_uhs(struct mmc_host *host)
 {
 	return host->caps &
@@ -559,6 +756,36 @@
 	return host->caps2 & MMC_CAP2_PACKED_WR;
 }
 
+static inline void mmc_host_set_halt(struct mmc_host *host)
+{
+	set_bit(CMDQ_STATE_HALT, &host->cmdq_ctx.curr_state);
+}
+
+static inline void mmc_host_clr_halt(struct mmc_host *host)
+{
+	clear_bit(CMDQ_STATE_HALT, &host->cmdq_ctx.curr_state);
+}
+
+static inline int mmc_host_halt(struct mmc_host *host)
+{
+	return test_bit(CMDQ_STATE_HALT, &host->cmdq_ctx.curr_state);
+}
+
+static inline void mmc_host_set_cq_disable(struct mmc_host *host)
+{
+	set_bit(CMDQ_STATE_CQ_DISABLE, &host->cmdq_ctx.curr_state);
+}
+
+static inline void mmc_host_clr_cq_disable(struct mmc_host *host)
+{
+	clear_bit(CMDQ_STATE_CQ_DISABLE, &host->cmdq_ctx.curr_state);
+}
+
+static inline int mmc_host_cq_disable(struct mmc_host *host)
+{
+	return test_bit(CMDQ_STATE_CQ_DISABLE, &host->cmdq_ctx.curr_state);
+}
+
 #ifdef CONFIG_MMC_CLKGATE
 void mmc_host_clk_hold(struct mmc_host *host);
 void mmc_host_clk_release(struct mmc_host *host);
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 68f60b8..2186a36 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -165,6 +165,7 @@
  * OCR bits are mostly in host.h
  */
 #define MMC_CARD_BUSY	0x80000000	/* Card Power up status bit */
+#define MMC_CARD_SECTOR_ADDR 0x40000000 /* Card supports sectors */
 
 /*
  * Card Command Classes (CCC)
@@ -214,6 +215,8 @@
  * EXT_CSD fields
  */
 
+#define EXT_CSD_CMDQ			15	/* R/W */
+#define EXT_CSD_BARRIER_CTRL		31      /* R/W */
 #define EXT_CSD_FLUSH_CACHE		32      /* W */
 #define EXT_CSD_CACHE_CTRL		33      /* R/W */
 #define EXT_CSD_POWER_OFF_NOTIFICATION	34	/* R/W */
@@ -267,6 +270,7 @@
 #define EXT_CSD_PWR_CL_200_360		237	/* RO */
 #define EXT_CSD_PWR_CL_DDR_52_195	238	/* RO */
 #define EXT_CSD_PWR_CL_DDR_52_360	239	/* RO */
+#define EXT_CSD_CACHE_FLUSH_POLICY	240	/* RO */
 #define EXT_CSD_BKOPS_STATUS		246	/* RO */
 #define EXT_CSD_POWER_OFF_LONG_TIME	247	/* RO */
 #define EXT_CSD_GENERIC_CMD6_TIME	248	/* RO */
@@ -276,6 +280,9 @@
 #define EXT_CSD_PRE_EOL_INFO		267	/* RO */
 #define EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_A	268	/* RO */
 #define EXT_CSD_DEVICE_LIFE_TIME_EST_TYP_B	269	/* RO */
+#define EXT_CSD_CMDQ_DEPTH		307	/* RO */
+#define EXT_CSD_CMDQ_SUPPORT		308	/* RO */
+#define EXT_CSD_BARRIER_SUPPORT		486	/* RO */
 #define EXT_CSD_SUPPORTED_MODE		493	/* RO */
 #define EXT_CSD_TAG_UNIT_SIZE		498	/* RO */
 #define EXT_CSD_DATA_TAG_SUPPORT	499	/* RO */
@@ -288,7 +295,8 @@
  * EXT_CSD field definitions
  */
 
-#define EXT_CSD_WR_REL_PARAM_EN		(1<<2)
+#define EXT_CSD_WR_REL_PARAM_EN			(1<<2)
+#define EXT_CSD_WR_REL_PARAM_EN_RPMB_REL_WR	(1<<4)
 
 #define EXT_CSD_BOOT_WP_B_PWR_WP_DIS	(0x40)
 #define EXT_CSD_BOOT_WP_B_PERM_WP_DIS	(0x10)
@@ -361,6 +369,9 @@
 
 #define EXT_CSD_PACKED_EVENT_EN	BIT(3)
 
+#define EXT_CSD_BKOPS_MANUAL_EN		BIT(0)
+#define EXT_CSD_BKOPS_AUTO_EN		BIT(1)
+
 /*
  * EXCEPTION_EVENT_STATUS field
  */
diff --git a/include/linux/mmc/ring_buffer.h b/include/linux/mmc/ring_buffer.h
new file mode 100644
index 0000000..e6bf163
--- /dev/null
+++ b/include/linux/mmc/ring_buffer.h
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2017, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __MMC_RING_BUFFER__
+#define __MMC_RING_BUFFER__
+
+#include <linux/mmc/card.h>
+#include <linux/smp.h>
+
+#include "core.h"
+
+#define MMC_TRACE_RBUF_SZ_ORDER	2	/* 2^2 pages */
+#define MMC_TRACE_RBUF_SZ	(PAGE_SIZE * (1 << MMC_TRACE_RBUF_SZ_ORDER))
+#define MMC_TRACE_EVENT_SZ	256
+#define MMC_TRACE_RBUF_NUM_EVENTS	(MMC_TRACE_RBUF_SZ / MMC_TRACE_EVENT_SZ)
+
+struct mmc_host;
+struct mmc_trace_buffer {
+	int	wr_idx;
+	bool stop_tracing;
+	spinlock_t trace_lock;
+	char *data;
+};
+
+#ifdef CONFIG_MMC_RING_BUFFER
+void mmc_stop_tracing(struct mmc_host *mmc);
+void mmc_trace_write(struct mmc_host *mmc, const char *fmt, ...);
+void mmc_trace_init(struct mmc_host *mmc);
+void mmc_trace_free(struct mmc_host *mmc);
+void mmc_dump_trace_buffer(struct mmc_host *mmc, struct seq_file *s);
+#else
+static inline void mmc_stop_tracing(struct mmc_host *mmc) {}
+static inline void mmc_trace_write(struct mmc_host *mmc,
+		const char *fmt, ...) {}
+static inline void mmc_trace_init(struct mmc_host *mmc) {}
+static inline void mmc_trace_free(struct mmc_host *mmc) {}
+static inline void mmc_dump_trace_buffer(struct mmc_host *mmc,
+		struct seq_file *s) {}
+#endif
+
+#define MMC_TRACE(mmc, fmt, ...) \
+		mmc_trace_write(mmc, fmt, ##__VA_ARGS__)
+
+#endif /* __MMC_RING_BUFFER__ */
diff --git a/include/linux/mmc/sdio.h b/include/linux/mmc/sdio.h
index 17446d3..3fc07d7 100644
--- a/include/linux/mmc/sdio.h
+++ b/include/linux/mmc/sdio.h
@@ -102,6 +102,7 @@
 #define  SDIO_BUS_WIDTH_1BIT	0x00
 #define  SDIO_BUS_WIDTH_RESERVED 0x01
 #define  SDIO_BUS_WIDTH_4BIT	0x02
+#define  SDIO_BUS_WIDTH_8BIT  	0x03
 #define  SDIO_BUS_ECSI		0x20	/* Enable continuous SPI interrupt */
 #define  SDIO_BUS_SCSI		0x40	/* Support continuous SPI interrupt */
 
@@ -163,6 +164,10 @@
 #define  SDIO_DTSx_SET_TYPE_A	(1 << SDIO_DRIVE_DTSx_SHIFT)
 #define  SDIO_DTSx_SET_TYPE_C	(2 << SDIO_DRIVE_DTSx_SHIFT)
 #define  SDIO_DTSx_SET_TYPE_D	(3 << SDIO_DRIVE_DTSx_SHIFT)
+
+#define SDIO_CCCR_INTERRUPT_EXTENSION	0x16
+#define	SDIO_SUPPORT_ASYNC_INTR		(1<<0)
+#define	SDIO_ENABLE_ASYNC_INTR		(1<<1)
 /*
  * Function Basic Registers (FBR)
  */
diff --git a/include/trace/events/mmc.h b/include/trace/events/mmc.h
index 379594b..917b3f0 100644
--- a/include/trace/events/mmc.h
+++ b/include/trace/events/mmc.h
@@ -247,7 +247,81 @@
 	TP_PROTO(unsigned int cmd, unsigned int len),
 	TP_ARGS(cmd, len));
 
-#endif /* _TRACE_MMC_H */
+TRACE_EVENT(mmc_clk,
+	TP_PROTO(char *print_info),
+
+	TP_ARGS(print_info),
+
+	TP_STRUCT__entry(
+		__string(print_info, print_info)
+	),
+
+	TP_fast_assign(
+		__assign_str(print_info, print_info);
+	),
+
+	TP_printk("%s",
+		__get_str(print_info)
+	)
+);
+
+DECLARE_EVENT_CLASS(mmc_pm_template,
+	TP_PROTO(const char *dev_name, int err, s64 usecs),
+
+	TP_ARGS(dev_name, err, usecs),
+
+	TP_STRUCT__entry(
+		__field(s64, usecs)
+		__field(int, err)
+		__string(dev_name, dev_name)
+	),
+
+	TP_fast_assign(
+		__entry->usecs = usecs;
+		__entry->err = err;
+		__assign_str(dev_name, dev_name);
+	),
+
+	TP_printk(
+		"took %lld usecs, %s err %d",
+		__entry->usecs,
+		__get_str(dev_name),
+		__entry->err
+	)
+);
+
+DEFINE_EVENT(mmc_pm_template, mmc_runtime_suspend,
+	     TP_PROTO(const char *dev_name, int err, s64 usecs),
+	     TP_ARGS(dev_name, err, usecs));
+
+DEFINE_EVENT(mmc_pm_template, mmc_runtime_resume,
+	     TP_PROTO(const char *dev_name, int err, s64 usecs),
+	     TP_ARGS(dev_name, err, usecs));
+
+DEFINE_EVENT(mmc_pm_template, mmc_suspend,
+	     TP_PROTO(const char *dev_name, int err, s64 usecs),
+	     TP_ARGS(dev_name, err, usecs));
+
+DEFINE_EVENT(mmc_pm_template, mmc_resume,
+	     TP_PROTO(const char *dev_name, int err, s64 usecs),
+	     TP_ARGS(dev_name, err, usecs));
+
+DEFINE_EVENT(mmc_pm_template, sdhci_msm_suspend,
+	     TP_PROTO(const char *dev_name, int err, s64 usecs),
+	     TP_ARGS(dev_name, err, usecs));
+
+DEFINE_EVENT(mmc_pm_template, sdhci_msm_resume,
+	     TP_PROTO(const char *dev_name, int err, s64 usecs),
+	     TP_ARGS(dev_name, err, usecs));
+
+DEFINE_EVENT(mmc_pm_template, sdhci_msm_runtime_suspend,
+	     TP_PROTO(const char *dev_name, int err, s64 usecs),
+	     TP_ARGS(dev_name, err, usecs));
+
+DEFINE_EVENT(mmc_pm_template, sdhci_msm_runtime_resume,
+	     TP_PROTO(const char *dev_name, int err, s64 usecs),
+	     TP_ARGS(dev_name, err, usecs));
+#endif /* if !defined(_TRACE_MMC_H) || defined(TRACE_HEADER_MULTI_READ) */
 
 /* This part must be outside protection */
 #include <trace/define_trace.h>
diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h
index fda50e9..eb18389 100644
--- a/include/uapi/drm/msm_drm.h
+++ b/include/uapi/drm/msm_drm.h
@@ -316,6 +316,7 @@
 #define DRM_SDE_WB_CONFIG              0x40
 #define DRM_MSM_REGISTER_EVENT         0x41
 #define DRM_MSM_DEREGISTER_EVENT       0x42
+#define DRM_MSM_RMFB2                  0x43
 
 /* sde custom events */
 #define DRM_EVENT_HISTOGRAM 0x80000000
@@ -335,6 +336,8 @@
 			DRM_MSM_REGISTER_EVENT), struct drm_msm_event_req)
 #define DRM_IOCTL_MSM_DEREGISTER_EVENT DRM_IOW((DRM_COMMAND_BASE + \
 			DRM_MSM_DEREGISTER_EVENT), struct drm_msm_event_req)
+#define DRM_IOCTL_MSM_RMFB2 DRM_IOW((DRM_COMMAND_BASE + \
+			DRM_MSM_RMFB2), unsigned int)
 
 #if defined(__cplusplus)
 }
diff --git a/include/uapi/linux/mmc/ioctl.h b/include/uapi/linux/mmc/ioctl.h
index 7e385b8..8fec144 100644
--- a/include/uapi/linux/mmc/ioctl.h
+++ b/include/uapi/linux/mmc/ioctl.h
@@ -63,6 +63,61 @@
  *	commands in array in sequence to card.
  */
 #define MMC_IOC_MULTI_CMD _IOWR(MMC_BLOCK_MAJOR, 1, struct mmc_ioc_multi_cmd)
+
+/**
+ * There are four request types that are applicable for rpmb accesses- two
+ * under read category and two under write. They are
+ *
+ *  Reads
+ *  -------
+ *  1. Read Write Counter
+ *  2. Authenticated data read
+ *
+ *
+ *  Writes
+ *  -------
+ *  1. Provision RPMB key (though it might be done in a secure environment)
+ *  2. Authenticated data write
+ *
+ *  While its given that the rpmb data frames are going to have that
+ *  information encoded in it and the frames should be generated by a secure
+ *  piece of code, the request types can be classified as above.
+ *
+ *  So here are the set of commands that should be executed atomically in the
+ *  ioctl for rpmb read operation
+ *  1. Switch partition
+ *  2. Set block count
+ *  3. Write data frame - CMD25 to write the rpmb data frame
+ *  4. Set block count
+ *  5. Read the data - CMD18 to do the actual read
+ *
+ *  Similarly for rpmb write operation, these are the commands that should be
+ *  executed atomically in the ioctl for rpmb write operation
+ *  1. Switch partition
+ *  2. Set block count
+ *  3. Write data frame - CMD25 to write the rpmb data frame with data
+ *  4. Set block count
+ *  5. Read the data - CMD25 to write rpmb data frame indicating that rpmb
+ *     result register is about to be read
+ *  6. Set block count
+ *  7. Read rpmb result - CMD18 to read the rpmb result register
+ *
+ * Each of the above commands should be sent individually via struct mmc_ioc_cmd
+ * and fields like is_acmd that are not needed for rpmb operations will be
+ * ignored.
+ */
+#define MMC_IOC_MAX_RPMB_CMD	3
+struct mmc_ioc_rpmb {
+	struct mmc_ioc_cmd cmds[MMC_IOC_MAX_RPMB_CMD];
+}
+;
+/*
+ * This ioctl is meant for use with rpmb partitions. This is needed since the
+ * access procedure for this particular partition is different from regular
+ * or normal partitions.
+ */
+#define MMC_IOC_RPMB_CMD _IOWR(MMC_BLOCK_MAJOR, 0, struct mmc_ioc_rpmb)
+
 /*
  * Since this ioctl is only meant to enhance (and not replace) normal access
  * to the mmc bus device, an upper data transfer limit of MMC_IOC_MAX_BYTES
diff --git a/include/uapi/linux/mmc/mmc.h b/include/uapi/linux/mmc/mmc.h
index f75ae94..8de329b 100644
--- a/include/uapi/linux/mmc/mmc.h
+++ b/include/uapi/linux/mmc/mmc.h
@@ -29,6 +29,7 @@
 #define MMC_READ_MULTIPLE_BLOCK  18   /* adtc [31:0] data addr   R1  */
 #define MMC_SEND_TUNING_BLOCK    19   /* adtc                    R1  */
 #define MMC_SEND_TUNING_BLOCK_HS200	21	/* adtc R1  */
+#define MMC_SEND_TUNING_BLOCK_HS400      MMC_SEND_TUNING_BLOCK_HS200
 
 #define MMC_TUNING_BLK_PATTERN_4BIT_SIZE	 64
 #define MMC_TUNING_BLK_PATTERN_8BIT_SIZE	128
@@ -64,4 +65,9 @@
 #define MMC_APP_CMD              55   /* ac   [31:16] RCA        R1  */
 #define MMC_GEN_CMD              56   /* adtc [0] RD/WR          R1  */
 
+/* class 11 */
+#define MMC_CMDQ_TASK_MGMT       48  /* ac   [31:0] task ID     R1b */
+#define DISCARD_QUEUE		0x1
+#define DISCARD_TASK		0x2
+
 #endif /* UAPI_MMC_MMC_H */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 5f98592..9055429 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -1165,7 +1165,7 @@
 static void update_rq_stats(void)
 {
 	unsigned long jiffy_gap = 0;
-	unsigned int rq_avg = 0;
+	unsigned long long rq_avg = 0;
 	unsigned long flags = 0;
 
 	jiffy_gap = jiffies - rq_info.rq_poll_last_jiffy;
diff --git a/sound/soc/msm/qdsp6v2/msm-pcm-voice-v2.c b/sound/soc/msm/qdsp6v2/msm-pcm-voice-v2.c
index e39e642..826f566 100644
--- a/sound/soc/msm/qdsp6v2/msm-pcm-voice-v2.c
+++ b/sound/soc/msm/qdsp6v2/msm-pcm-voice-v2.c
@@ -394,12 +394,13 @@
 					struct snd_ctl_elem_value *ucontrol)
 {
 	int ret;
-	bool sidetone_enable = ucontrol->value.integer.value[0];
+	long value = ucontrol->value.integer.value[0];
+	bool sidetone_enable = value;
 	uint32_t session_id = ALL_SESSION_VSID;
 
-	if (sidetone_enable < 0) {
-		pr_err("%s: Invalid arguments sidetone enable %d\n",
-			 __func__, sidetone_enable);
+	if (value < 0) {
+		pr_err("%s: Invalid arguments sidetone enable %ld\n",
+			 __func__, value);
 		ret = -EINVAL;
 		return ret;
 	}