Merge branch 'for_linus' of git://cavan.codon.org.uk/platform-drivers-x86 Pull x86 platform driver revert from Matthew Garrett: "This clearly shouldn't have been merged. No excuse on my part" * 'for_linus' of git://cavan.codon.org.uk/platform-drivers-x86: Revert "platform/x86/toshiba-apci.c possible bad if test?"

commit: 3951ad2e051543f6cd01706da477a73f19165eb6 [log] [tgz]
author: Linus Torvalds <torvalds@linux-foundation.org> Wed Aug 20 18:23:06 2014 -0500
committer: Linus Torvalds <torvalds@linux-foundation.org> Wed Aug 20 18:23:06 2014 -0500
tree: 34f7bfc370aa9c4fefb84bc3cc9e7e950406db5b
parent: e9de42d8eeffdc23af0144cafa9e3deacc489fb9 [diff]
parent: 8039aabb6c9f802bca04cc77ca210060a5b53916 [diff]
diff --git a/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt b/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
index 1486497..ce6a1a0 100644
--- a/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt
+++ b/Documentation/devicetree/bindings/interrupt-controller/interrupts.txt

@@ -4,11 +4,13 @@
 1) Interrupt client nodes
 -------------------------
 
-Nodes that describe devices which generate interrupts must contain an either an
-"interrupts" property or an "interrupts-extended" property. These properties
-contain a list of interrupt specifiers, one per output interrupt. The format of
-the interrupt specifier is determined by the interrupt controller to which the
-interrupts are routed; see section 2 below for details.
+Nodes that describe devices which generate interrupts must contain an
+"interrupts" property, an "interrupts-extended" property, or both. If both are
+present, the latter should take precedence; the former may be provided simply
+for compatibility with software that does not recognize the latter. These
+properties contain a list of interrupt specifiers, one per output interrupt. The
+format of the interrupt specifier is determined by the interrupt controller to
+which the interrupts are routed; see section 2 below for details.
 
   Example:
 	interrupt-parent = <&intc1>;

diff --git a/Documentation/devicetree/bindings/pci/designware-pcie.txt b/Documentation/devicetree/bindings/pci/designware-pcie.txt
index d0d15ee..ed0d9b9 100644
--- a/Documentation/devicetree/bindings/pci/designware-pcie.txt
+++ b/Documentation/devicetree/bindings/pci/designware-pcie.txt

@@ -2,6 +2,10 @@
 
 Required properties:
 - compatible: should contain "snps,dw-pcie" to identify the core.
+- reg: Should contain the configuration address space.
+- reg-names: Must be "config" for the PCIe configuration space.
+    (The old way of getting the configuration address space from "ranges"
+    is deprecated and should be avoided.)
 - #address-cells: set to <3>
 - #size-cells: set to <2>
 - device_type: set to "pci"

diff --git a/Documentation/devicetree/bindings/pci/ti-pci.txt b/Documentation/devicetree/bindings/pci/ti-pci.txt
new file mode 100644
index 0000000..3d21791
--- /dev/null
+++ b/Documentation/devicetree/bindings/pci/ti-pci.txt

@@ -0,0 +1,59 @@
+TI PCI Controllers
+
+PCIe Designware Controller
+ - compatible: Should be "ti,dra7-pcie""
+ - reg : Two register ranges as listed in the reg-names property
+ - reg-names : The first entry must be "ti-conf" for the TI specific registers
+	       The second entry must be "rc-dbics" for the designware pcie
+	       registers
+	       The third entry must be "config" for the PCIe configuration space
+ - phys : list of PHY specifiers (used by generic PHY framework)
+ - phy-names : must be "pcie-phy0", "pcie-phy1", "pcie-phyN".. based on the
+	       number of PHYs as specified in *phys* property.
+ - ti,hwmods : Name of the hwmod associated to the pcie, "pcie<X>",
+	       where <X> is the instance number of the pcie from the HW spec.
+ - interrupts : Two interrupt entries must be specified. The first one is for
+		main interrupt line and the second for MSI interrupt line.
+ - #address-cells,
+   #size-cells,
+   #interrupt-cells,
+   device_type,
+   ranges,
+   num-lanes,
+   interrupt-map-mask,
+   interrupt-map : as specified in ../designware-pcie.txt
+
+Example:
+axi {
+	compatible = "simple-bus";
+	#size-cells = <1>;
+	#address-cells = <1>;
+	ranges = <0x51000000 0x51000000 0x3000
+		  0x0	     0x20000000 0x10000000>;
+	pcie@51000000 {
+		compatible = "ti,dra7-pcie";
+		reg = <0x51000000 0x2000>, <0x51002000 0x14c>, <0x1000 0x2000>;
+		reg-names = "rc_dbics", "ti_conf", "config";
+		interrupts = <0 232 0x4>, <0 233 0x4>;
+		#address-cells = <3>;
+		#size-cells = <2>;
+		device_type = "pci";
+		ranges = <0x81000000 0 0          0x03000 0 0x00010000
+			  0x82000000 0 0x20013000 0x13000 0 0xffed000>;
+		#interrupt-cells = <1>;
+		num-lanes = <1>;
+		ti,hwmods = "pcie1";
+		phys = <&pcie1_phy>;
+		phy-names = "pcie-phy0";
+		interrupt-map-mask = <0 0 0 7>;
+		interrupt-map = <0 0 0 1 &pcie_intc 1>,
+				<0 0 0 2 &pcie_intc 2>,
+				<0 0 0 3 &pcie_intc 3>,
+				<0 0 0 4 &pcie_intc 4>;
+		pcie_intc: interrupt-controller {
+			interrupt-controller;
+			#address-cells = <0>;
+			#interrupt-cells = <1>;
+		};
+	};
+};

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index b18dd17..f1997e9 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking

@@ -349,7 +349,11 @@
 locking rules:
 			inode->i_lock	may block
 fl_copy_lock:		yes		no
-fl_release_private:	maybe		no
+fl_release_private:	maybe		maybe[1]
+
+[1]:	->fl_release_private for flock or POSIX locks is currently allowed
+to block. Leases however can still be freed while the i_lock is held and
+so fl_release_private called on a lease should not block.
 
 ----------------------- lock_manager_operations ---------------------------
 prototypes:

diff --git a/MAINTAINERS b/MAINTAINERS
index aefa948..05dc39d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS

@@ -6858,6 +6858,14 @@
 F:	Documentation/devicetree/bindings/pci/nvidia,tegra20-pcie.txt
 F:	drivers/pci/host/pci-tegra.c
 
+PCI DRIVER FOR TI DRA7XX
+M:	Kishon Vijay Abraham I <kishon@ti.com>
+L:	linux-omap@vger.kernel.org
+L:	linux-pci@vger.kernel.org
+S:	Supported
+F:	Documentation/devicetree/bindings/pci/ti-pci.txt
+F:	drivers/pci/host/pci-dra7xx.c
+
 PCI DRIVER FOR RENESAS R-CAR
 M:	Simon Horman <horms@verge.net.au>
 L:	linux-pci@vger.kernel.org

diff --git a/Makefile b/Makefile
index 6aace67..e432442 100644
--- a/Makefile
+++ b/Makefile

@@ -1,7 +1,7 @@
 VERSION = 3
-PATCHLEVEL = 16
+PATCHLEVEL = 17
 SUBLEVEL = 0
-EXTRAVERSION =
+EXTRAVERSION = -rc1
 NAME = Shuffling Zombie Juror
 
 # *DOCUMENTATION*

diff --git a/arch/frv/include/asm/processor.h b/arch/frv/include/asm/processor.h
index a34f309..6554e78 100644
--- a/arch/frv/include/asm/processor.h
+++ b/arch/frv/include/asm/processor.h

@@ -129,7 +129,8 @@
 #define	KSTK_EIP(tsk)	((tsk)->thread.frame0->pc)
 #define	KSTK_ESP(tsk)	((tsk)->thread.frame0->sp)
 
-#define cpu_relax()    barrier()
+#define cpu_relax() barrier()
+#define cpu_relax_lowlatency() cpu_relax()
 
 /* data cache prefetch */
 #define ARCH_HAS_PREFETCH

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c
index 329d7fd..b9615ba 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c

@@ -101,7 +101,7 @@
 	ri = kmalloc(sizeof(struct kvm_rma_info), GFP_KERNEL);
 	if (!ri)
 		return NULL;
-	page = cma_alloc(kvm_cma, kvm_rma_pages, get_order(kvm_rma_pages));
+	page = cma_alloc(kvm_cma, kvm_rma_pages, order_base_2(kvm_rma_pages));
 	if (!page)
 		goto err_out;
 	atomic_set(&ri->use_count, 1);
@@ -135,12 +135,12 @@
 {
 	unsigned long align_pages = HPT_ALIGN_PAGES;
 
-	VM_BUG_ON(get_order(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
+	VM_BUG_ON(order_base_2(nr_pages) < KVM_CMA_CHUNK_ORDER - PAGE_SHIFT);
 
 	/* Old CPUs require HPT aligned on a multiple of its size */
 	if (!cpu_has_feature(CPU_FTR_ARCH_206))
 		align_pages = nr_pages;
-	return cma_alloc(kvm_cma, nr_pages, get_order(align_pages));
+	return cma_alloc(kvm_cma, nr_pages, order_base_2(align_pages));
 }
 EXPORT_SYMBOL_GPL(kvm_alloc_hpt);
 

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 5724601..7c492ed 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h

@@ -95,7 +95,7 @@
 #define KVM_REFILL_PAGES 25
 #define KVM_MAX_CPUID_ENTRIES 80
 #define KVM_NR_FIXED_MTRR_REGION 88
-#define KVM_NR_VAR_MTRR 10
+#define KVM_NR_VAR_MTRR 8
 
 #define ASYNC_PF_PER_VCPU 64
 

diff --git a/arch/x86/include/uapi/asm/msr-index.h b/arch/x86/include/uapi/asm/msr-index.h
index eac9e92..e21331ce 100644
--- a/arch/x86/include/uapi/asm/msr-index.h
+++ b/arch/x86/include/uapi/asm/msr-index.h

@@ -149,6 +149,9 @@
 
 #define MSR_CORE_C1_RES			0x00000660
 
+#define MSR_CC6_DEMOTION_POLICY_CONFIG	0x00000668
+#define MSR_MC6_DEMOTION_POLICY_CONFIG	0x00000669
+
 #define MSR_AMD64_MC0_MASK		0xc0010044
 
 #define MSR_IA32_MCx_CTL(x)		(MSR_IA32_MC0_CTL + 4*(x))

diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 56657b0..03954f7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c

@@ -1491,9 +1491,6 @@
 			goto exception;
 		break;
 	case VCPU_SREG_CS:
-		if (in_task_switch && rpl != dpl)
-			goto exception;
-
 		if (!(seg_desc.type & 8))
 			goto exception;
 
@@ -4394,8 +4391,11 @@
 
 	ctxt->execute = opcode.u.execute;
 
+	if (unlikely(ctxt->ud) && likely(!(ctxt->d & EmulateOnUD)))
+		return EMULATION_FAILED;
+
 	if (unlikely(ctxt->d &
-		     (NotImpl|EmulateOnUD|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
+		     (NotImpl|Stack|Op3264|Sse|Mmx|Intercept|CheckPerm))) {
 		/*
 		 * These are copied unconditionally here, and checked unconditionally
 		 * in x86_emulate_insn.
@@ -4406,9 +4406,6 @@
 		if (ctxt->d & NotImpl)
 			return EMULATION_FAILED;
 
-		if (!(ctxt->d & EmulateOnUD) && ctxt->ud)
-			return EMULATION_FAILED;
-
 		if (mode == X86EMUL_MODE_PROT64 && (ctxt->d & Stack))
 			ctxt->op_bytes = 8;
 

diff --git a/drivers/char/hw_random/virtio-rng.c b/drivers/char/hw_random/virtio-rng.c
index 0027137..2e3139e 100644
--- a/drivers/char/hw_random/virtio-rng.c
+++ b/drivers/char/hw_random/virtio-rng.c

@@ -116,6 +116,7 @@
 		.cleanup = virtio_cleanup,
 		.priv = (unsigned long)vi,
 		.name = vi->name,
+		.quality = 1000,
 	};
 	vdev->priv = vi;
 

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 4d140bb..9b7ee7e 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c

@@ -89,6 +89,7 @@
 	 * Indicate which enable bits to clear here.
 	 */
 	unsigned long auto_demotion_disable_flags;
+	bool byt_auto_demotion_disable_flag;
 	bool disable_promotion_to_c1e;
 };
 
@@ -442,6 +443,66 @@
 	{
 		.enter = NULL }
 };
+static struct cpuidle_state bdw_cstates[] = {
+	{
+		.name = "C1-BDW",
+		.desc = "MWAIT 0x00",
+		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 2,
+		.target_residency = 2,
+		.enter = &intel_idle },
+	{
+		.name = "C1E-BDW",
+		.desc = "MWAIT 0x01",
+		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.exit_latency = 10,
+		.target_residency = 20,
+		.enter = &intel_idle },
+	{
+		.name = "C3-BDW",
+		.desc = "MWAIT 0x10",
+		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 40,
+		.target_residency = 100,
+		.enter = &intel_idle },
+	{
+		.name = "C6-BDW",
+		.desc = "MWAIT 0x20",
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 133,
+		.target_residency = 400,
+		.enter = &intel_idle },
+	{
+		.name = "C7s-BDW",
+		.desc = "MWAIT 0x32",
+		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 166,
+		.target_residency = 500,
+		.enter = &intel_idle },
+	{
+		.name = "C8-BDW",
+		.desc = "MWAIT 0x40",
+		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 300,
+		.target_residency = 900,
+		.enter = &intel_idle },
+	{
+		.name = "C9-BDW",
+		.desc = "MWAIT 0x50",
+		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 600,
+		.target_residency = 1800,
+		.enter = &intel_idle },
+	{
+		.name = "C10-BDW",
+		.desc = "MWAIT 0x60",
+		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.exit_latency = 2600,
+		.target_residency = 7700,
+		.enter = &intel_idle },
+	{
+		.enter = NULL }
+};
 
 static struct cpuidle_state atom_cstates[] = {
 	{
@@ -613,6 +674,7 @@
 static const struct idle_cpu idle_cpu_byt = {
 	.state_table = byt_cstates,
 	.disable_promotion_to_c1e = true,
+	.byt_auto_demotion_disable_flag = true,
 };
 
 static const struct idle_cpu idle_cpu_ivb = {
@@ -630,6 +692,11 @@
 	.disable_promotion_to_c1e = true,
 };
 
+static const struct idle_cpu idle_cpu_bdw = {
+	.state_table = bdw_cstates,
+	.disable_promotion_to_c1e = true,
+};
+
 static const struct idle_cpu idle_cpu_avn = {
 	.state_table = avn_cstates,
 	.disable_promotion_to_c1e = true,
@@ -658,7 +725,10 @@
 	ICPU(0x3f, idle_cpu_hsw),
 	ICPU(0x45, idle_cpu_hsw),
 	ICPU(0x46, idle_cpu_hsw),
-	ICPU(0x4D, idle_cpu_avn),
+	ICPU(0x4d, idle_cpu_avn),
+	ICPU(0x3d, idle_cpu_bdw),
+	ICPU(0x4f, idle_cpu_bdw),
+	ICPU(0x56, idle_cpu_bdw),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, intel_idle_ids);
@@ -814,6 +884,11 @@
 	if (icpu->auto_demotion_disable_flags)
 		on_each_cpu(auto_demotion_disable, NULL, 1);
 
+	if (icpu->byt_auto_demotion_disable_flag) {
+		wrmsrl(MSR_CC6_DEMOTION_POLICY_CONFIG, 0);
+		wrmsrl(MSR_MC6_DEMOTION_POLICY_CONFIG, 0);
+	}
+
 	if (icpu->disable_promotion_to_c1e)	/* each-cpu is redundant */
 		on_each_cpu(c1e_promotion_disable, NULL, 1);
 

diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index b08c188..6703751 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c

@@ -2953,6 +2953,7 @@
 		 */
 		if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery)) {
 			end_reshape(conf);
+			close_sync(conf);
 			return 0;
 		}
 
@@ -3081,6 +3082,7 @@
 			}
 
 			r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
+			r10_bio->state = 0;
 			raise_barrier(conf, rb2 != NULL);
 			atomic_set(&r10_bio->remaining, 0);
 
@@ -3269,6 +3271,7 @@
 		if (sync_blocks < max_sync)
 			max_sync = sync_blocks;
 		r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
+		r10_bio->state = 0;
 
 		r10_bio->mddev = mddev;
 		atomic_set(&r10_bio->remaining, 0);
@@ -4384,6 +4387,7 @@
 read_more:
 	/* Now schedule reads for blocks from sector_nr to last */
 	r10_bio = mempool_alloc(conf->r10buf_pool, GFP_NOIO);
+	r10_bio->state = 0;
 	raise_barrier(conf, sectors_done != 0);
 	atomic_set(&r10_bio->remaining, 0);
 	r10_bio->mddev = mddev;
@@ -4398,6 +4402,7 @@
 		 * on all the target devices.
 		 */
 		// FIXME
+		mempool_free(r10_bio, conf->r10buf_pool);
 		set_bit(MD_RECOVERY_INTR, &mddev->recovery);
 		return sectors_done;
 	}
@@ -4410,7 +4415,7 @@
 	read_bio->bi_private = r10_bio;
 	read_bio->bi_end_io = end_sync_read;
 	read_bio->bi_rw = READ;
-	read_bio->bi_flags &= ~(BIO_POOL_MASK - 1);
+	read_bio->bi_flags &= (~0UL << BIO_RESET_BITS);
 	read_bio->bi_flags |= 1 << BIO_UPTODATE;
 	read_bio->bi_vcnt = 0;
 	read_bio->bi_iter.bi_size = 0;

diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 6234b2e..183588b 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c

@@ -2922,7 +2922,7 @@
 	      (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) &&
 	      !test_bit(R5_OVERWRITE, &fdev[0]->flags)) ||
 	     (sh->raid_conf->level == 6 && s->failed && s->to_write &&
-	      s->to_write < sh->raid_conf->raid_disks - 2 &&
+	      s->to_write - s->non_overwrite < sh->raid_conf->raid_disks - 2 &&
 	      (!test_bit(R5_Insync, &dev->flags) || test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))))) {
 		/* we would like to get this block, possibly by computing it,
 		 * otherwise read it if the backing disk is insync
@@ -3817,6 +3817,8 @@
 				set_bit(R5_Wantwrite, &dev->flags);
 				if (prexor)
 					continue;
+				if (s.failed > 1)
+					continue;
 				if (!test_bit(R5_Insync, &dev->flags) ||
 				    ((i == sh->pd_idx || i == sh->qd_idx)  &&
 				     s.failed == 0))

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index f46a24f..79cb831 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c

@@ -453,7 +453,7 @@
 		base = dt_mem_next_cell(dt_root_addr_cells, &prop);
 		size = dt_mem_next_cell(dt_root_size_cells, &prop);
 
-		if (base && size &&
+		if (size &&
 		    early_init_dt_reserve_memory_arch(base, size, nomap) == 0)
 			pr_debug("Reserved memory: reserved region for node '%s': base %pa, size %ld MiB\n",
 				uname, &base, (unsigned long)size / SZ_1M);

diff --git a/drivers/of/irq.c b/drivers/of/irq.c
index 3e06a69..1471e0a 100644
--- a/drivers/of/irq.c
+++ b/drivers/of/irq.c

@@ -301,16 +301,17 @@
 	/* Get the reg property (if any) */
 	addr = of_get_property(device, "reg", NULL);
 
+	/* Try the new-style interrupts-extended first */
+	res = of_parse_phandle_with_args(device, "interrupts-extended",
+					"#interrupt-cells", index, out_irq);
+	if (!res)
+		return of_irq_parse_raw(addr, out_irq);
+
 	/* Get the interrupts property */
 	intspec = of_get_property(device, "interrupts", &intlen);
-	if (intspec == NULL) {
-		/* Try the new-style interrupts-extended */
-		res = of_parse_phandle_with_args(device, "interrupts-extended",
-						"#interrupt-cells", index, out_irq);
-		if (res)
-			return -EINVAL;
-		return of_irq_parse_raw(addr, out_irq);
-	}
+	if (intspec == NULL)
+		return -EINVAL;
+
 	intlen /= sizeof(*intspec);
 
 	pr_debug(" intspec=%d intlen=%d\n", be32_to_cpup(intspec), intlen);

diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index d410026..a737cb5 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c

@@ -27,6 +27,7 @@
 #define NO_OF_NODES 2
 static struct device_node *nodes[NO_OF_NODES];
 static int last_node_index;
+static bool selftest_live_tree;
 
 #define selftest(result, fmt, ...) { \
 	if (!(result)) { \
@@ -630,13 +631,6 @@
 {
 	struct device_node *next, *root = np, *dup;
 
-	if (!np) {
-		pr_warn("%s: No tree to attach; not running tests\n",
-			__func__);
-		return -ENODATA;
-	}
-
-
 	/* skip root node */
 	np = np->child;
 	/* storing a copy in temporary node */
@@ -672,12 +666,12 @@
 static int __init selftest_data_add(void)
 {
 	void *selftest_data;
-	struct device_node *selftest_data_node;
+	struct device_node *selftest_data_node, *np;
 	extern uint8_t __dtb_testcases_begin[];
 	extern uint8_t __dtb_testcases_end[];
 	const int size = __dtb_testcases_end - __dtb_testcases_begin;
 
-	if (!size || !of_allnodes) {
+	if (!size) {
 		pr_warn("%s: No testcase data to attach; not running tests\n",
 			__func__);
 		return -ENODATA;
@@ -692,6 +686,22 @@
 		return -ENOMEM;
 	}
 	of_fdt_unflatten_tree(selftest_data, &selftest_data_node);
+	if (!selftest_data_node) {
+		pr_warn("%s: No tree to attach; not running tests\n", __func__);
+		return -ENODATA;
+	}
+
+	if (!of_allnodes) {
+		/* enabling flag for removing nodes */
+		selftest_live_tree = true;
+		of_allnodes = selftest_data_node;
+
+		for_each_of_allnodes(np)
+			__of_attach_node_sysfs(np);
+		of_aliases = of_find_node_by_path("/aliases");
+		of_chosen = of_find_node_by_path("/chosen");
+		return 0;
+	}
 
 	/* attach the sub-tree to live tree */
 	return attach_node_and_children(selftest_data_node);
@@ -723,6 +733,18 @@
 	struct device_node *np;
 	struct property *prop;
 
+	if (selftest_live_tree) {
+		of_node_put(of_aliases);
+		of_node_put(of_chosen);
+		of_aliases = NULL;
+		of_chosen = NULL;
+		for_each_child_of_node(of_allnodes, np)
+			detach_node_and_children(np);
+		__of_detach_node_sysfs(of_allnodes);
+		of_allnodes = NULL;
+		return;
+	}
+
 	while (last_node_index >= 0) {
 		if (nodes[last_node_index]) {
 			np = of_find_node_by_path(nodes[last_node_index]->full_name);

diff --git a/drivers/pci/host/Kconfig b/drivers/pci/host/Kconfig
index 2d8a4d0..8922c37 100644
--- a/drivers/pci/host/Kconfig
+++ b/drivers/pci/host/Kconfig

@@ -1,9 +1,18 @@
 menu "PCI host controller drivers"
 	depends on PCI
 
+config PCI_DRA7XX
+	bool "TI DRA7xx PCIe controller"
+	select PCIE_DW
+	depends on OF && HAS_IOMEM && TI_PIPE3
+	help
+	 Enables support for the PCIe controller in the DRA7xx SoC.  There
+	 are two instances of PCIe controller in DRA7xx.  This controller can
+	 act both as EP and RC.  This reuses the Designware core.
+
 config PCI_MVEBU
 	bool "Marvell EBU PCIe controller"
-	depends on ARCH_MVEBU || ARCH_DOVE || ARCH_KIRKWOOD
+	depends on ARCH_MVEBU || ARCH_DOVE
 	depends on OF
 
 config PCIE_DW

diff --git a/drivers/pci/host/Makefile b/drivers/pci/host/Makefile
index 0daec79..d0e88f1 100644
--- a/drivers/pci/host/Makefile
+++ b/drivers/pci/host/Makefile

@@ -1,4 +1,5 @@
 obj-$(CONFIG_PCIE_DW) += pcie-designware.o
+obj-$(CONFIG_PCI_DRA7XX) += pci-dra7xx.o
 obj-$(CONFIG_PCI_EXYNOS) += pci-exynos.o
 obj-$(CONFIG_PCI_IMX6) += pci-imx6.o
 obj-$(CONFIG_PCI_MVEBU) += pci-mvebu.o

diff --git a/drivers/pci/host/pci-dra7xx.c b/drivers/pci/host/pci-dra7xx.c
new file mode 100644
index 0000000..52b34fe
--- /dev/null
+++ b/drivers/pci/host/pci-dra7xx.c

@@ -0,0 +1,458 @@
+/*
+ * pcie-dra7xx - PCIe controller driver for TI DRA7xx SoCs
+ *
+ * Copyright (C) 2013-2014 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Authors: Kishon Vijay Abraham I <kishon@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/phy/phy.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/resource.h>
+#include <linux/types.h>
+
+#include "pcie-designware.h"
+
+/* PCIe controller wrapper DRA7XX configuration registers */
+
+#define	PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN		0x0024
+#define	PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MAIN		0x0028
+#define	ERR_SYS						BIT(0)
+#define	ERR_FATAL					BIT(1)
+#define	ERR_NONFATAL					BIT(2)
+#define	ERR_COR						BIT(3)
+#define	ERR_AXI						BIT(4)
+#define	ERR_ECRC					BIT(5)
+#define	PME_TURN_OFF					BIT(8)
+#define	PME_TO_ACK					BIT(9)
+#define	PM_PME						BIT(10)
+#define	LINK_REQ_RST					BIT(11)
+#define	LINK_UP_EVT					BIT(12)
+#define	CFG_BME_EVT					BIT(13)
+#define	CFG_MSE_EVT					BIT(14)
+#define	INTERRUPTS (ERR_SYS | ERR_FATAL | ERR_NONFATAL | ERR_COR | ERR_AXI | \
+			ERR_ECRC | PME_TURN_OFF | PME_TO_ACK | PM_PME | \
+			LINK_REQ_RST | LINK_UP_EVT | CFG_BME_EVT | CFG_MSE_EVT)
+
+#define	PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI		0x0034
+#define	PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MSI		0x0038
+#define	INTA						BIT(0)
+#define	INTB						BIT(1)
+#define	INTC						BIT(2)
+#define	INTD						BIT(3)
+#define	MSI						BIT(4)
+#define	LEG_EP_INTERRUPTS (INTA | INTB | INTC | INTD)
+
+#define	PCIECTRL_DRA7XX_CONF_DEVICE_CMD			0x0104
+#define	LTSSM_EN					0x1
+
+#define	PCIECTRL_DRA7XX_CONF_PHY_CS			0x010C
+#define	LINK_UP						BIT(16)
+
+struct dra7xx_pcie {
+	void __iomem		*base;
+	struct phy		**phy;
+	int			phy_count;
+	struct device		*dev;
+	struct pcie_port	pp;
+};
+
+#define to_dra7xx_pcie(x)	container_of((x), struct dra7xx_pcie, pp)
+
+static inline u32 dra7xx_pcie_readl(struct dra7xx_pcie *pcie, u32 offset)
+{
+	return readl(pcie->base + offset);
+}
+
+static inline void dra7xx_pcie_writel(struct dra7xx_pcie *pcie, u32 offset,
+				      u32 value)
+{
+	writel(value, pcie->base + offset);
+}
+
+static int dra7xx_pcie_link_up(struct pcie_port *pp)
+{
+	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp);
+	u32 reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_PHY_CS);
+
+	return !!(reg & LINK_UP);
+}
+
+static int dra7xx_pcie_establish_link(struct pcie_port *pp)
+{
+	u32 reg;
+	unsigned int retries = 1000;
+	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp);
+
+	if (dw_pcie_link_up(pp)) {
+		dev_err(pp->dev, "link is already up\n");
+		return 0;
+	}
+
+	reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD);
+	reg |= LTSSM_EN;
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD, reg);
+
+	while (retries--) {
+		reg = dra7xx_pcie_readl(dra7xx,	PCIECTRL_DRA7XX_CONF_PHY_CS);
+		if (reg & LINK_UP)
+			break;
+		usleep_range(10, 20);
+	}
+
+	if (retries == 0) {
+		dev_err(pp->dev, "link is not up\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void dra7xx_pcie_enable_interrupts(struct pcie_port *pp)
+{
+	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp);
+
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN,
+			   ~INTERRUPTS);
+	dra7xx_pcie_writel(dra7xx,
+			   PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MAIN, INTERRUPTS);
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI,
+			   ~LEG_EP_INTERRUPTS & ~MSI);
+
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		dra7xx_pcie_writel(dra7xx,
+				   PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MSI, MSI);
+	else
+		dra7xx_pcie_writel(dra7xx,
+				   PCIECTRL_DRA7XX_CONF_IRQENABLE_SET_MSI,
+				   LEG_EP_INTERRUPTS);
+}
+
+static void dra7xx_pcie_host_init(struct pcie_port *pp)
+{
+	dw_pcie_setup_rc(pp);
+	dra7xx_pcie_establish_link(pp);
+	if (IS_ENABLED(CONFIG_PCI_MSI))
+		dw_pcie_msi_init(pp);
+	dra7xx_pcie_enable_interrupts(pp);
+}
+
+static struct pcie_host_ops dra7xx_pcie_host_ops = {
+	.link_up = dra7xx_pcie_link_up,
+	.host_init = dra7xx_pcie_host_init,
+};
+
+static int dra7xx_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+				irq_hw_number_t hwirq)
+{
+	irq_set_chip_and_handler(irq, &dummy_irq_chip, handle_simple_irq);
+	irq_set_chip_data(irq, domain->host_data);
+	set_irq_flags(irq, IRQF_VALID);
+
+	return 0;
+}
+
+static const struct irq_domain_ops intx_domain_ops = {
+	.map = dra7xx_pcie_intx_map,
+};
+
+static int dra7xx_pcie_init_irq_domain(struct pcie_port *pp)
+{
+	struct device *dev = pp->dev;
+	struct device_node *node = dev->of_node;
+	struct device_node *pcie_intc_node =  of_get_next_child(node, NULL);
+
+	if (!pcie_intc_node) {
+		dev_err(dev, "No PCIe Intc node found\n");
+		return PTR_ERR(pcie_intc_node);
+	}
+
+	pp->irq_domain = irq_domain_add_linear(pcie_intc_node, 4,
+					       &intx_domain_ops, pp);
+	if (!pp->irq_domain) {
+		dev_err(dev, "Failed to get a INTx IRQ domain\n");
+		return PTR_ERR(pp->irq_domain);
+	}
+
+	return 0;
+}
+
+static irqreturn_t dra7xx_pcie_msi_irq_handler(int irq, void *arg)
+{
+	struct pcie_port *pp = arg;
+	struct dra7xx_pcie *dra7xx = to_dra7xx_pcie(pp);
+	u32 reg;
+
+	reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI);
+
+	switch (reg) {
+	case MSI:
+		dw_handle_msi_irq(pp);
+		break;
+	case INTA:
+	case INTB:
+	case INTC:
+	case INTD:
+		generic_handle_irq(irq_find_mapping(pp->irq_domain, ffs(reg)));
+		break;
+	}
+
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MSI, reg);
+
+	return IRQ_HANDLED;
+}
+
+
+static irqreturn_t dra7xx_pcie_irq_handler(int irq, void *arg)
+{
+	struct dra7xx_pcie *dra7xx = arg;
+	u32 reg;
+
+	reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN);
+
+	if (reg & ERR_SYS)
+		dev_dbg(dra7xx->dev, "System Error\n");
+
+	if (reg & ERR_FATAL)
+		dev_dbg(dra7xx->dev, "Fatal Error\n");
+
+	if (reg & ERR_NONFATAL)
+		dev_dbg(dra7xx->dev, "Non Fatal Error\n");
+
+	if (reg & ERR_COR)
+		dev_dbg(dra7xx->dev, "Correctable Error\n");
+
+	if (reg & ERR_AXI)
+		dev_dbg(dra7xx->dev, "AXI tag lookup fatal Error\n");
+
+	if (reg & ERR_ECRC)
+		dev_dbg(dra7xx->dev, "ECRC Error\n");
+
+	if (reg & PME_TURN_OFF)
+		dev_dbg(dra7xx->dev,
+			"Power Management Event Turn-Off message received\n");
+
+	if (reg & PME_TO_ACK)
+		dev_dbg(dra7xx->dev,
+			"Power Management Turn-Off Ack message received\n");
+
+	if (reg & PM_PME)
+		dev_dbg(dra7xx->dev,
+			"PM Power Management Event message received\n");
+
+	if (reg & LINK_REQ_RST)
+		dev_dbg(dra7xx->dev, "Link Request Reset\n");
+
+	if (reg & LINK_UP_EVT)
+		dev_dbg(dra7xx->dev, "Link-up state change\n");
+
+	if (reg & CFG_BME_EVT)
+		dev_dbg(dra7xx->dev, "CFG 'Bus Master Enable' change\n");
+
+	if (reg & CFG_MSE_EVT)
+		dev_dbg(dra7xx->dev, "CFG 'Memory Space Enable' change\n");
+
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_IRQSTATUS_MAIN, reg);
+
+	return IRQ_HANDLED;
+}
+
+static int add_pcie_port(struct dra7xx_pcie *dra7xx,
+			  struct platform_device *pdev)
+{
+	int ret;
+	struct pcie_port *pp;
+	struct resource *res;
+	struct device *dev = &pdev->dev;
+
+	pp = &dra7xx->pp;
+	pp->dev = dev;
+	pp->ops = &dra7xx_pcie_host_ops;
+
+	pp->irq = platform_get_irq(pdev, 1);
+	if (pp->irq < 0) {
+		dev_err(dev, "missing IRQ resource\n");
+		return -EINVAL;
+	}
+
+	ret = devm_request_irq(&pdev->dev, pp->irq,
+			       dra7xx_pcie_msi_irq_handler, IRQF_SHARED,
+			       "dra7-pcie-msi",	pp);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to request irq\n");
+		return ret;
+	}
+
+	if (!IS_ENABLED(CONFIG_PCI_MSI)) {
+		ret = dra7xx_pcie_init_irq_domain(pp);
+		if (ret < 0)
+			return ret;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "rc_dbics");
+	pp->dbi_base = devm_ioremap(dev, res->start, resource_size(res));
+	if (!pp->dbi_base)
+		return -ENOMEM;
+
+	ret = dw_pcie_host_init(pp);
+	if (ret) {
+		dev_err(dra7xx->dev, "failed to initialize host\n");
+		return ret;
+	}
+
+	return 0;
+}
+
+static int __init dra7xx_pcie_probe(struct platform_device *pdev)
+{
+	u32 reg;
+	int ret;
+	int irq;
+	int i;
+	int phy_count;
+	struct phy **phy;
+	void __iomem *base;
+	struct resource *res;
+	struct dra7xx_pcie *dra7xx;
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	char name[10];
+
+	dra7xx = devm_kzalloc(dev, sizeof(*dra7xx), GFP_KERNEL);
+	if (!dra7xx)
+		return -ENOMEM;
+
+	irq = platform_get_irq(pdev, 0);
+	if (irq < 0) {
+		dev_err(dev, "missing IRQ resource\n");
+		return -EINVAL;
+	}
+
+	ret = devm_request_irq(dev, irq, dra7xx_pcie_irq_handler,
+			       IRQF_SHARED, "dra7xx-pcie-main", dra7xx);
+	if (ret) {
+		dev_err(dev, "failed to request irq\n");
+		return ret;
+	}
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "ti_conf");
+	base = devm_ioremap_nocache(dev, res->start, resource_size(res));
+	if (!base)
+		return -ENOMEM;
+
+	phy_count = of_property_count_strings(np, "phy-names");
+	if (phy_count < 0) {
+		dev_err(dev, "unable to find the strings\n");
+		return phy_count;
+	}
+
+	phy = devm_kzalloc(dev, sizeof(*phy) * phy_count, GFP_KERNEL);
+	if (!phy)
+		return -ENOMEM;
+
+	for (i = 0; i < phy_count; i++) {
+		snprintf(name, sizeof(name), "pcie-phy%d", i);
+		phy[i] = devm_phy_get(dev, name);
+		if (IS_ERR(phy[i]))
+			return PTR_ERR(phy[i]);
+
+		ret = phy_init(phy[i]);
+		if (ret < 0)
+			goto err_phy;
+
+		ret = phy_power_on(phy[i]);
+		if (ret < 0) {
+			phy_exit(phy[i]);
+			goto err_phy;
+		}
+	}
+
+	dra7xx->base = base;
+	dra7xx->phy = phy;
+	dra7xx->dev = dev;
+	dra7xx->phy_count = phy_count;
+
+	pm_runtime_enable(dev);
+	ret = pm_runtime_get_sync(dev);
+	if (IS_ERR_VALUE(ret)) {
+		dev_err(dev, "pm_runtime_get_sync failed\n");
+		goto err_phy;
+	}
+
+	reg = dra7xx_pcie_readl(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD);
+	reg &= ~LTSSM_EN;
+	dra7xx_pcie_writel(dra7xx, PCIECTRL_DRA7XX_CONF_DEVICE_CMD, reg);
+
+	platform_set_drvdata(pdev, dra7xx);
+
+	ret = add_pcie_port(dra7xx, pdev);
+	if (ret < 0)
+		goto err_add_port;
+
+	return 0;
+
+err_add_port:
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
+
+err_phy:
+	while (--i >= 0) {
+		phy_power_off(phy[i]);
+		phy_exit(phy[i]);
+	}
+
+	return ret;
+}
+
+static int __exit dra7xx_pcie_remove(struct platform_device *pdev)
+{
+	struct dra7xx_pcie *dra7xx = platform_get_drvdata(pdev);
+	struct pcie_port *pp = &dra7xx->pp;
+	struct device *dev = &pdev->dev;
+	int count = dra7xx->phy_count;
+
+	if (pp->irq_domain)
+		irq_domain_remove(pp->irq_domain);
+	pm_runtime_put(dev);
+	pm_runtime_disable(dev);
+	while (count--) {
+		phy_power_off(dra7xx->phy[count]);
+		phy_exit(dra7xx->phy[count]);
+	}
+
+	return 0;
+}
+
+static const struct of_device_id of_dra7xx_pcie_match[] = {
+	{ .compatible = "ti,dra7-pcie", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, of_dra7xx_pcie_match);
+
+static struct platform_driver dra7xx_pcie_driver = {
+	.remove		= __exit_p(dra7xx_pcie_remove),
+	.driver = {
+		.name	= "dra7-pcie",
+		.owner	= THIS_MODULE,
+		.of_match_table = of_dra7xx_pcie_match,
+	},
+};
+
+module_platform_driver_probe(dra7xx_pcie_driver, dra7xx_pcie_probe);
+
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
+MODULE_DESCRIPTION("TI PCIe controller driver");
+MODULE_LICENSE("GPL v2");

diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index abd6578..0fb0fdb 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c

@@ -25,6 +25,7 @@
  */
 
 #include <linux/clk.h>
+#include <linux/debugfs.h>
 #include <linux/delay.h>
 #include <linux/export.h>
 #include <linux/interrupt.h>
@@ -276,6 +277,7 @@
 	unsigned int num_supplies;
 
 	const struct tegra_pcie_soc_data *soc_data;
+	struct dentry *debugfs;
 };
 
 struct tegra_pcie_port {
@@ -1739,6 +1741,115 @@
 };
 MODULE_DEVICE_TABLE(of, tegra_pcie_of_match);
 
+static void *tegra_pcie_ports_seq_start(struct seq_file *s, loff_t *pos)
+{
+	struct tegra_pcie *pcie = s->private;
+
+	if (list_empty(&pcie->ports))
+		return NULL;
+
+	seq_printf(s, "Index  Status\n");
+
+	return seq_list_start(&pcie->ports, *pos);
+}
+
+static void *tegra_pcie_ports_seq_next(struct seq_file *s, void *v, loff_t *pos)
+{
+	struct tegra_pcie *pcie = s->private;
+
+	return seq_list_next(v, &pcie->ports, pos);
+}
+
+static void tegra_pcie_ports_seq_stop(struct seq_file *s, void *v)
+{
+}
+
+static int tegra_pcie_ports_seq_show(struct seq_file *s, void *v)
+{
+	bool up = false, active = false;
+	struct tegra_pcie_port *port;
+	unsigned int value;
+
+	port = list_entry(v, struct tegra_pcie_port, list);
+
+	value = readl(port->base + RP_VEND_XP);
+
+	if (value & RP_VEND_XP_DL_UP)
+		up = true;
+
+	value = readl(port->base + RP_LINK_CONTROL_STATUS);
+
+	if (value & RP_LINK_CONTROL_STATUS_DL_LINK_ACTIVE)
+		active = true;
+
+	seq_printf(s, "%2u     ", port->index);
+
+	if (up)
+		seq_printf(s, "up");
+
+	if (active) {
+		if (up)
+			seq_printf(s, ", ");
+
+		seq_printf(s, "active");
+	}
+
+	seq_printf(s, "\n");
+	return 0;
+}
+
+static const struct seq_operations tegra_pcie_ports_seq_ops = {
+	.start = tegra_pcie_ports_seq_start,
+	.next = tegra_pcie_ports_seq_next,
+	.stop = tegra_pcie_ports_seq_stop,
+	.show = tegra_pcie_ports_seq_show,
+};
+
+static int tegra_pcie_ports_open(struct inode *inode, struct file *file)
+{
+	struct tegra_pcie *pcie = inode->i_private;
+	struct seq_file *s;
+	int err;
+
+	err = seq_open(file, &tegra_pcie_ports_seq_ops);
+	if (err)
+		return err;
+
+	s = file->private_data;
+	s->private = pcie;
+
+	return 0;
+}
+
+static const struct file_operations tegra_pcie_ports_ops = {
+	.owner = THIS_MODULE,
+	.open = tegra_pcie_ports_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release,
+};
+
+static int tegra_pcie_debugfs_init(struct tegra_pcie *pcie)
+{
+	struct dentry *file;
+
+	pcie->debugfs = debugfs_create_dir("pcie", NULL);
+	if (!pcie->debugfs)
+		return -ENOMEM;
+
+	file = debugfs_create_file("ports", S_IFREG | S_IRUGO, pcie->debugfs,
+				   pcie, &tegra_pcie_ports_ops);
+	if (!file)
+		goto remove;
+
+	return 0;
+
+remove:
+	debugfs_remove_recursive(pcie->debugfs);
+	pcie->debugfs = NULL;
+	return -ENOMEM;
+}
+
 static int tegra_pcie_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *match;
@@ -1793,6 +1904,13 @@
 		goto disable_msi;
 	}
 
+	if (IS_ENABLED(CONFIG_DEBUG_FS)) {
+		err = tegra_pcie_debugfs_init(pcie);
+		if (err < 0)
+			dev_err(&pdev->dev, "failed to setup debugfs: %d\n",
+				err);
+	}
+
 	platform_set_drvdata(pdev, pcie);
 	return 0;
 

diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index 1eaf4df..52bd3a1 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c

@@ -20,6 +20,7 @@
 #include <linux/of_pci.h>
 #include <linux/pci.h>
 #include <linux/pci_regs.h>
+#include <linux/platform_device.h>
 #include <linux/types.h>
 
 #include "pcie-designware.h"
@@ -217,27 +218,47 @@
 	return 0;
 }
 
+static void dw_pcie_msi_clear_irq(struct pcie_port *pp, int irq)
+{
+	unsigned int res, bit, val;
+
+	res = (irq / 32) * 12;
+	bit = irq % 32;
+	dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, &val);
+	val &= ~(1 << bit);
+	dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, val);
+}
+
 static void clear_irq_range(struct pcie_port *pp, unsigned int irq_base,
 			    unsigned int nvec, unsigned int pos)
 {
-	unsigned int i, res, bit, val;
+	unsigned int i;
 
 	for (i = 0; i < nvec; i++) {
 		irq_set_msi_desc_off(irq_base, i, NULL);
 		clear_bit(pos + i, pp->msi_irq_in_use);
 		/* Disable corresponding interrupt on MSI controller */
-		res = ((pos + i) / 32) * 12;
-		bit = (pos + i) % 32;
-		dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, &val);
-		val &= ~(1 << bit);
-		dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, val);
+		if (pp->ops->msi_clear_irq)
+			pp->ops->msi_clear_irq(pp, pos + i);
+		else
+			dw_pcie_msi_clear_irq(pp, pos + i);
 	}
 }
 
+static void dw_pcie_msi_set_irq(struct pcie_port *pp, int irq)
+{
+	unsigned int res, bit, val;
+
+	res = (irq / 32) * 12;
+	bit = irq % 32;
+	dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, &val);
+	val |= 1 << bit;
+	dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, val);
+}
+
 static int assign_irq(int no_irqs, struct msi_desc *desc, int *pos)
 {
-	int res, bit, irq, pos0, pos1, i;
-	u32 val;
+	int irq, pos0, pos1, i;
 	struct pcie_port *pp = sys_to_pcie(desc->dev->bus->sysdata);
 
 	if (!pp) {
@@ -281,11 +302,10 @@
 		}
 		set_bit(pos0 + i, pp->msi_irq_in_use);
 		/*Enable corresponding interrupt in MSI interrupt controller */
-		res = ((pos0 + i) / 32) * 12;
-		bit = (pos0 + i) % 32;
-		dw_pcie_rd_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, &val);
-		val |= 1 << bit;
-		dw_pcie_wr_own_conf(pp, PCIE_MSI_INTR0_ENABLE + res, 4, val);
+		if (pp->ops->msi_set_irq)
+			pp->ops->msi_set_irq(pp, pos0 + i);
+		else
+			dw_pcie_msi_set_irq(pp, pos0 + i);
 	}
 
 	*pos = pos0;
@@ -353,7 +373,10 @@
 	 */
 	desc->msi_attrib.multiple = msgvec;
 
-	msg.address_lo = virt_to_phys((void *)pp->msi_data);
+	if (pp->ops->get_msi_data)
+		msg.address_lo = pp->ops->get_msi_data(pp);
+	else
+		msg.address_lo = virt_to_phys((void *)pp->msi_data);
 	msg.address_hi = 0x0;
 	msg.data = pos;
 	write_msi_msg(irq, &msg);
@@ -396,10 +419,35 @@
 int __init dw_pcie_host_init(struct pcie_port *pp)
 {
 	struct device_node *np = pp->dev->of_node;
+	struct platform_device *pdev = to_platform_device(pp->dev);
 	struct of_pci_range range;
 	struct of_pci_range_parser parser;
-	u32 val;
-	int i;
+	struct resource *cfg_res;
+	u32 val, na, ns;
+	const __be32 *addrp;
+	int i, index;
+
+	/* Find the address cell size and the number of cells in order to get
+	 * the untranslated address.
+	 */
+	of_property_read_u32(np, "#address-cells", &na);
+	ns = of_n_size_cells(np);
+
+	cfg_res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "config");
+	if (cfg_res) {
+		pp->config.cfg0_size = resource_size(cfg_res)/2;
+		pp->config.cfg1_size = resource_size(cfg_res)/2;
+		pp->cfg0_base = cfg_res->start;
+		pp->cfg1_base = cfg_res->start + pp->config.cfg0_size;
+
+		/* Find the untranslated configuration space address */
+		index = of_property_match_string(np, "reg-names", "config");
+		addrp = of_get_address(np, index, false, false);
+		pp->cfg0_mod_base = of_read_number(addrp, ns);
+		pp->cfg1_mod_base = pp->cfg0_mod_base + pp->config.cfg0_size;
+	} else {
+		dev_err(pp->dev, "missing *config* reg space\n");
+	}
 
 	if (of_pci_range_parser_init(&parser, np)) {
 		dev_err(pp->dev, "missing ranges property\n");
@@ -422,17 +470,33 @@
 			pp->config.io_size = resource_size(&pp->io);
 			pp->config.io_bus_addr = range.pci_addr;
 			pp->io_base = range.cpu_addr;
+
+			/* Find the untranslated IO space address */
+			pp->io_mod_base = of_read_number(parser.range -
+							 parser.np + na, ns);
 		}
 		if (restype == IORESOURCE_MEM) {
 			of_pci_range_to_resource(&range, np, &pp->mem);
 			pp->mem.name = "MEM";
 			pp->config.mem_size = resource_size(&pp->mem);
 			pp->config.mem_bus_addr = range.pci_addr;
+
+			/* Find the untranslated MEM space address */
+			pp->mem_mod_base = of_read_number(parser.range -
+							  parser.np + na, ns);
 		}
 		if (restype == 0) {
 			of_pci_range_to_resource(&range, np, &pp->cfg);
 			pp->config.cfg0_size = resource_size(&pp->cfg)/2;
 			pp->config.cfg1_size = resource_size(&pp->cfg)/2;
+			pp->cfg0_base = pp->cfg.start;
+			pp->cfg1_base = pp->cfg.start + pp->config.cfg0_size;
+
+			/* Find the untranslated configuration space address */
+			pp->cfg0_mod_base = of_read_number(parser.range -
+							   parser.np + na, ns);
+			pp->cfg1_mod_base = pp->cfg0_mod_base +
+					    pp->config.cfg0_size;
 		}
 	}
 
@@ -445,8 +509,6 @@
 		}
 	}
 
-	pp->cfg0_base = pp->cfg.start;
-	pp->cfg1_base = pp->cfg.start + pp->config.cfg0_size;
 	pp->mem_base = pp->mem.start;
 
 	pp->va_cfg0_base = devm_ioremap(pp->dev, pp->cfg0_base,
@@ -509,9 +571,9 @@
 	/* Program viewport 0 : OUTBOUND : CFG0 */
 	dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX0,
 			  PCIE_ATU_VIEWPORT);
-	dw_pcie_writel_rc(pp, pp->cfg0_base, PCIE_ATU_LOWER_BASE);
-	dw_pcie_writel_rc(pp, (pp->cfg0_base >> 32), PCIE_ATU_UPPER_BASE);
-	dw_pcie_writel_rc(pp, pp->cfg0_base + pp->config.cfg0_size - 1,
+	dw_pcie_writel_rc(pp, pp->cfg0_mod_base, PCIE_ATU_LOWER_BASE);
+	dw_pcie_writel_rc(pp, (pp->cfg0_mod_base >> 32), PCIE_ATU_UPPER_BASE);
+	dw_pcie_writel_rc(pp, pp->cfg0_mod_base + pp->config.cfg0_size - 1,
 			  PCIE_ATU_LIMIT);
 	dw_pcie_writel_rc(pp, busdev, PCIE_ATU_LOWER_TARGET);
 	dw_pcie_writel_rc(pp, 0, PCIE_ATU_UPPER_TARGET);
@@ -525,9 +587,9 @@
 	dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX1,
 			  PCIE_ATU_VIEWPORT);
 	dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_CFG1, PCIE_ATU_CR1);
-	dw_pcie_writel_rc(pp, pp->cfg1_base, PCIE_ATU_LOWER_BASE);
-	dw_pcie_writel_rc(pp, (pp->cfg1_base >> 32), PCIE_ATU_UPPER_BASE);
-	dw_pcie_writel_rc(pp, pp->cfg1_base + pp->config.cfg1_size - 1,
+	dw_pcie_writel_rc(pp, pp->cfg1_mod_base, PCIE_ATU_LOWER_BASE);
+	dw_pcie_writel_rc(pp, (pp->cfg1_mod_base >> 32), PCIE_ATU_UPPER_BASE);
+	dw_pcie_writel_rc(pp, pp->cfg1_mod_base + pp->config.cfg1_size - 1,
 			  PCIE_ATU_LIMIT);
 	dw_pcie_writel_rc(pp, busdev, PCIE_ATU_LOWER_TARGET);
 	dw_pcie_writel_rc(pp, 0, PCIE_ATU_UPPER_TARGET);
@@ -540,9 +602,9 @@
 	dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX0,
 			  PCIE_ATU_VIEWPORT);
 	dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_MEM, PCIE_ATU_CR1);
-	dw_pcie_writel_rc(pp, pp->mem_base, PCIE_ATU_LOWER_BASE);
-	dw_pcie_writel_rc(pp, (pp->mem_base >> 32), PCIE_ATU_UPPER_BASE);
-	dw_pcie_writel_rc(pp, pp->mem_base + pp->config.mem_size - 1,
+	dw_pcie_writel_rc(pp, pp->mem_mod_base, PCIE_ATU_LOWER_BASE);
+	dw_pcie_writel_rc(pp, (pp->mem_mod_base >> 32), PCIE_ATU_UPPER_BASE);
+	dw_pcie_writel_rc(pp, pp->mem_mod_base + pp->config.mem_size - 1,
 			  PCIE_ATU_LIMIT);
 	dw_pcie_writel_rc(pp, pp->config.mem_bus_addr, PCIE_ATU_LOWER_TARGET);
 	dw_pcie_writel_rc(pp, upper_32_bits(pp->config.mem_bus_addr),
@@ -556,9 +618,9 @@
 	dw_pcie_writel_rc(pp, PCIE_ATU_REGION_OUTBOUND | PCIE_ATU_REGION_INDEX1,
 			  PCIE_ATU_VIEWPORT);
 	dw_pcie_writel_rc(pp, PCIE_ATU_TYPE_IO, PCIE_ATU_CR1);
-	dw_pcie_writel_rc(pp, pp->io_base, PCIE_ATU_LOWER_BASE);
-	dw_pcie_writel_rc(pp, (pp->io_base >> 32), PCIE_ATU_UPPER_BASE);
-	dw_pcie_writel_rc(pp, pp->io_base + pp->config.io_size - 1,
+	dw_pcie_writel_rc(pp, pp->io_mod_base, PCIE_ATU_LOWER_BASE);
+	dw_pcie_writel_rc(pp, (pp->io_mod_base >> 32), PCIE_ATU_UPPER_BASE);
+	dw_pcie_writel_rc(pp, pp->io_mod_base + pp->config.io_size - 1,
 			  PCIE_ATU_LIMIT);
 	dw_pcie_writel_rc(pp, pp->config.io_bus_addr, PCIE_ATU_LOWER_TARGET);
 	dw_pcie_writel_rc(pp, upper_32_bits(pp->config.io_bus_addr),
@@ -656,7 +718,11 @@
 	}
 
 	if (bus->number != pp->root_bus_nr)
-		ret = dw_pcie_rd_other_conf(pp, bus, devfn,
+		if (pp->ops->rd_other_conf)
+			ret = pp->ops->rd_other_conf(pp, bus, devfn,
+						where, size, val);
+		else
+			ret = dw_pcie_rd_other_conf(pp, bus, devfn,
 						where, size, val);
 	else
 		ret = dw_pcie_rd_own_conf(pp, where, size, val);
@@ -679,7 +745,11 @@
 		return PCIBIOS_DEVICE_NOT_FOUND;
 
 	if (bus->number != pp->root_bus_nr)
-		ret = dw_pcie_wr_other_conf(pp, bus, devfn,
+		if (pp->ops->wr_other_conf)
+			ret = pp->ops->wr_other_conf(pp, bus, devfn,
+						where, size, val);
+		else
+			ret = dw_pcie_wr_other_conf(pp, bus, devfn,
 						where, size, val);
 	else
 		ret = dw_pcie_wr_own_conf(pp, where, size, val);

diff --git a/drivers/pci/host/pcie-designware.h b/drivers/pci/host/pcie-designware.h
index 77f592f..daf81f9 100644
--- a/drivers/pci/host/pcie-designware.h
+++ b/drivers/pci/host/pcie-designware.h

@@ -36,11 +36,15 @@
 	u8			root_bus_nr;
 	void __iomem		*dbi_base;
 	u64			cfg0_base;
+	u64			cfg0_mod_base;
 	void __iomem		*va_cfg0_base;
 	u64			cfg1_base;
+	u64			cfg1_mod_base;
 	void __iomem		*va_cfg1_base;
 	u64			io_base;
+	u64			io_mod_base;
 	u64			mem_base;
+	u64			mem_mod_base;
 	struct resource		cfg;
 	struct resource		io;
 	struct resource		mem;
@@ -61,8 +65,15 @@
 			u32 val, void __iomem *dbi_base);
 	int (*rd_own_conf)(struct pcie_port *pp, int where, int size, u32 *val);
 	int (*wr_own_conf)(struct pcie_port *pp, int where, int size, u32 val);
+	int (*rd_other_conf)(struct pcie_port *pp, struct pci_bus *bus,
+			unsigned int devfn, int where, int size, u32 *val);
+	int (*wr_other_conf)(struct pcie_port *pp, struct pci_bus *bus,
+			unsigned int devfn, int where, int size, u32 val);
 	int (*link_up)(struct pcie_port *pp);
 	void (*host_init)(struct pcie_port *pp);
+	void (*msi_set_irq)(struct pcie_port *pp, int irq);
+	void (*msi_clear_irq)(struct pcie_port *pp, int irq);
+	u32 (*get_msi_data)(struct pcie_port *pp);
 };
 
 int dw_pcie_cfg_read(void __iomem *addr, int where, int size, u32 *val);

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index df33060..d81f3cc 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c

@@ -377,6 +377,10 @@
 		pool->slab_flags |= SLAB_CACHE_DMA;
 		pool->gfp_mask = __GFP_DMA;
 	}
+
+	if (hostt->cmd_size)
+		hostt->cmd_pool = pool;
+
 	return pool;
 }
 
@@ -421,8 +425,10 @@
 out_free_slab:
 	kmem_cache_destroy(pool->cmd_slab);
 out_free_pool:
-	if (hostt->cmd_size)
+	if (hostt->cmd_size) {
 		scsi_free_host_cmd_pool(pool);
+		hostt->cmd_pool = NULL;
+	}
 	goto out;
 }
 
@@ -444,8 +450,10 @@
 	if (!--pool->users) {
 		kmem_cache_destroy(pool->cmd_slab);
 		kmem_cache_destroy(pool->sense_slab);
-		if (hostt->cmd_size)
+		if (hostt->cmd_size) {
 			scsi_free_host_cmd_pool(pool);
+			hostt->cmd_pool = NULL;
+		}
 	}
 	mutex_unlock(&host_cmd_pool_mutex);
 }

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9c44392..ce62e87 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c

@@ -1774,7 +1774,7 @@
 	blk_requeue_request(q, req);
 	atomic_dec(&sdev->device_busy);
 out_delay:
-	if (atomic_read(&sdev->device_busy) && !scsi_device_blocked(sdev))
+	if (!atomic_read(&sdev->device_busy) && !scsi_device_blocked(sdev))
 		blk_delay_queue(q, SCSI_QUEUE_DELAY);
 }
 

diff --git a/fs/aio.c b/fs/aio.c
index bd7ec2c..ae63587 100644
--- a/fs/aio.c
+++ b/fs/aio.c

@@ -192,7 +192,6 @@
 	}
 
 	file->f_flags = O_RDWR;
-	file->private_data = ctx;
 	return file;
 }
 
@@ -202,7 +201,7 @@
 	static const struct dentry_operations ops = {
 		.d_dname	= simple_dname,
 	};
-	return mount_pseudo(fs_type, "aio:", NULL, &ops, 0xa10a10a1);
+	return mount_pseudo(fs_type, "aio:", NULL, &ops, AIO_RING_MAGIC);
 }
 
 /* aio_setup
@@ -556,8 +555,7 @@
 	struct aio_ring *ring;
 
 	spin_lock(&mm->ioctx_lock);
-	rcu_read_lock();
-	table = rcu_dereference(mm->ioctx_table);
+	table = rcu_dereference_raw(mm->ioctx_table);
 
 	while (1) {
 		if (table)
@@ -565,7 +563,6 @@
 				if (!table->table[i]) {
 					ctx->id = i;
 					table->table[i] = ctx;
-					rcu_read_unlock();
 					spin_unlock(&mm->ioctx_lock);
 
 					/* While kioctx setup is in progress,
@@ -579,8 +576,6 @@
 				}
 
 		new_nr = (table ? table->nr : 1) * 4;
-
-		rcu_read_unlock();
 		spin_unlock(&mm->ioctx_lock);
 
 		table = kzalloc(sizeof(*table) + sizeof(struct kioctx *) *
@@ -591,8 +586,7 @@
 		table->nr = new_nr;
 
 		spin_lock(&mm->ioctx_lock);
-		rcu_read_lock();
-		old = rcu_dereference(mm->ioctx_table);
+		old = rcu_dereference_raw(mm->ioctx_table);
 
 		if (!old) {
 			rcu_assign_pointer(mm->ioctx_table, table);
@@ -739,12 +733,9 @@
 
 
 	spin_lock(&mm->ioctx_lock);
-	rcu_read_lock();
-	table = rcu_dereference(mm->ioctx_table);
-
+	table = rcu_dereference_raw(mm->ioctx_table);
 	WARN_ON(ctx != table->table[ctx->id]);
 	table->table[ctx->id] = NULL;
-	rcu_read_unlock();
 	spin_unlock(&mm->ioctx_lock);
 
 	/* percpu_ref_kill() will do the necessary call_rcu() */
@@ -793,40 +784,30 @@
  */
 void exit_aio(struct mm_struct *mm)
 {
-	struct kioctx_table *table;
-	struct kioctx *ctx;
-	unsigned i = 0;
+	struct kioctx_table *table = rcu_dereference_raw(mm->ioctx_table);
+	int i;
 
-	while (1) {
-		rcu_read_lock();
-		table = rcu_dereference(mm->ioctx_table);
+	if (!table)
+		return;
 
-		do {
-			if (!table || i >= table->nr) {
-				rcu_read_unlock();
-				rcu_assign_pointer(mm->ioctx_table, NULL);
-				if (table)
-					kfree(table);
-				return;
-			}
+	for (i = 0; i < table->nr; ++i) {
+		struct kioctx *ctx = table->table[i];
 
-			ctx = table->table[i++];
-		} while (!ctx);
-
-		rcu_read_unlock();
-
+		if (!ctx)
+			continue;
 		/*
-		 * We don't need to bother with munmap() here -
-		 * exit_mmap(mm) is coming and it'll unmap everything.
-		 * Since aio_free_ring() uses non-zero ->mmap_size
-		 * as indicator that it needs to unmap the area,
-		 * just set it to 0; aio_free_ring() is the only
-		 * place that uses ->mmap_size, so it's safe.
+		 * We don't need to bother with munmap() here - exit_mmap(mm)
+		 * is coming and it'll unmap everything. And we simply can't,
+		 * this is not necessarily our ->mm.
+		 * Since kill_ioctx() uses non-zero ->mmap_size as indicator
+		 * that it needs to unmap the area, just set it to 0.
 		 */
 		ctx->mmap_size = 0;
-
 		kill_ioctx(mm, ctx, NULL);
 	}
+
+	RCU_INIT_POINTER(mm->ioctx_table, NULL);
+	kfree(table);
 }
 
 static void put_reqs_available(struct kioctx *ctx, unsigned nr)
@@ -834,10 +815,8 @@
 	struct kioctx_cpu *kcpu;
 	unsigned long flags;
 
-	preempt_disable();
-	kcpu = this_cpu_ptr(ctx->cpu);
-
 	local_irq_save(flags);
+	kcpu = this_cpu_ptr(ctx->cpu);
 	kcpu->reqs_available += nr;
 
 	while (kcpu->reqs_available >= ctx->req_batch * 2) {
@@ -846,7 +825,6 @@
 	}
 
 	local_irq_restore(flags);
-	preempt_enable();
 }
 
 static bool get_reqs_available(struct kioctx *ctx)
@@ -855,10 +833,8 @@
 	bool ret = false;
 	unsigned long flags;
 
-	preempt_disable();
-	kcpu = this_cpu_ptr(ctx->cpu);
-
 	local_irq_save(flags);
+	kcpu = this_cpu_ptr(ctx->cpu);
 	if (!kcpu->reqs_available) {
 		int old, avail = atomic_read(&ctx->reqs_available);
 
@@ -878,7 +854,6 @@
 	kcpu->reqs_available--;
 out:
 	local_irq_restore(flags);
-	preempt_enable();
 	return ret;
 }
 
@@ -1047,7 +1022,7 @@
 }
 EXPORT_SYMBOL(aio_complete);
 
-/* aio_read_events
+/* aio_read_events_ring
  *	Pull an event off of the ioctx's event ring.  Returns the number of
  *	events fetched
  */
@@ -1270,12 +1245,12 @@
 	if (compat)
 		ret = compat_rw_copy_check_uvector(rw,
 				(struct compat_iovec __user *)buf,
-				*nr_segs, 1, *iovec, iovec);
+				*nr_segs, UIO_FASTIOV, *iovec, iovec);
 	else
 #endif
 		ret = rw_copy_check_uvector(rw,
 				(struct iovec __user *)buf,
-				*nr_segs, 1, *iovec, iovec);
+				*nr_segs, UIO_FASTIOV, *iovec, iovec);
 	if (ret < 0)
 		return ret;
 
@@ -1299,9 +1274,8 @@
 }
 
 /*
- * aio_setup_iocb:
- *	Performs the initial checks and aio retry method
- *	setup for the kiocb at the time of io submission.
+ * aio_run_iocb:
+ *	Performs the initial checks and io submission.
  */
 static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode,
 			    char __user *buf, bool compat)
@@ -1313,7 +1287,7 @@
 	fmode_t mode;
 	aio_rw_op *rw_op;
 	rw_iter_op *iter_op;
-	struct iovec inline_vec, *iovec = &inline_vec;
+	struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs;
 	struct iov_iter iter;
 
 	switch (opcode) {
@@ -1348,7 +1322,7 @@
 		if (!ret)
 			ret = rw_verify_area(rw, file, &req->ki_pos, req->ki_nbytes);
 		if (ret < 0) {
-			if (iovec != &inline_vec)
+			if (iovec != inline_vecs)
 				kfree(iovec);
 			return ret;
 		}
@@ -1395,7 +1369,7 @@
 		return -EINVAL;
 	}
 
-	if (iovec != &inline_vec)
+	if (iovec != inline_vecs)
 		kfree(iovec);
 
 	if (ret != -EIOCBQUEUED) {

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index e25564b..54a201d 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c

@@ -276,9 +276,8 @@
 			}
 			if (ret > 0)
 				goto next;
-			ret = ulist_add_merge(parents, eb->start,
-					      (uintptr_t)eie,
-					      (u64 *)&old, GFP_NOFS);
+			ret = ulist_add_merge_ptr(parents, eb->start,
+						  eie, (void **)&old, GFP_NOFS);
 			if (ret < 0)
 				break;
 			if (!ret && extent_item_pos) {
@@ -1001,16 +1000,19 @@
 					ret = -EIO;
 					goto out;
 				}
+				btrfs_tree_read_lock(eb);
+				btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
 				ret = find_extent_in_eb(eb, bytenr,
 							*extent_item_pos, &eie);
+				btrfs_tree_read_unlock_blocking(eb);
 				free_extent_buffer(eb);
 				if (ret < 0)
 					goto out;
 				ref->inode_list = eie;
 			}
-			ret = ulist_add_merge(refs, ref->parent,
-					      (uintptr_t)ref->inode_list,
-					      (u64 *)&eie, GFP_NOFS);
+			ret = ulist_add_merge_ptr(refs, ref->parent,
+						  ref->inode_list,
+						  (void **)&eie, GFP_NOFS);
 			if (ret < 0)
 				goto out;
 			if (!ret && extent_item_pos) {

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 4794923..43527fd 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h

@@ -84,12 +84,6 @@
 	 */
 	struct list_head delalloc_inodes;
 
-	/*
-	 * list for tracking inodes that must be sent to disk before a
-	 * rename or truncate commit
-	 */
-	struct list_head ordered_operations;
-
 	/* node for the red-black tree that links inodes in subvolume root */
 	struct rb_node rb_node;
 

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index aeab453..44ee5d2 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c

@@ -280,9 +280,9 @@
 
 	WARN_ON(btrfs_header_generation(buf) > trans->transid);
 	if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
-		ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+		ret = btrfs_inc_ref(trans, root, cow, 1);
 	else
-		ret = btrfs_inc_ref(trans, root, cow, 0, 1);
+		ret = btrfs_inc_ref(trans, root, cow, 0);
 
 	if (ret)
 		return ret;
@@ -1035,14 +1035,14 @@
 		if ((owner == root->root_key.objectid ||
 		     root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
 		    !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
-			ret = btrfs_inc_ref(trans, root, buf, 1, 1);
+			ret = btrfs_inc_ref(trans, root, buf, 1);
 			BUG_ON(ret); /* -ENOMEM */
 
 			if (root->root_key.objectid ==
 			    BTRFS_TREE_RELOC_OBJECTID) {
-				ret = btrfs_dec_ref(trans, root, buf, 0, 1);
+				ret = btrfs_dec_ref(trans, root, buf, 0);
 				BUG_ON(ret); /* -ENOMEM */
-				ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+				ret = btrfs_inc_ref(trans, root, cow, 1);
 				BUG_ON(ret); /* -ENOMEM */
 			}
 			new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
@@ -1050,9 +1050,9 @@
 
 			if (root->root_key.objectid ==
 			    BTRFS_TREE_RELOC_OBJECTID)
-				ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+				ret = btrfs_inc_ref(trans, root, cow, 1);
 			else
-				ret = btrfs_inc_ref(trans, root, cow, 0, 1);
+				ret = btrfs_inc_ref(trans, root, cow, 0);
 			BUG_ON(ret); /* -ENOMEM */
 		}
 		if (new_flags != 0) {
@@ -1069,11 +1069,11 @@
 		if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) {
 			if (root->root_key.objectid ==
 			    BTRFS_TREE_RELOC_OBJECTID)
-				ret = btrfs_inc_ref(trans, root, cow, 1, 1);
+				ret = btrfs_inc_ref(trans, root, cow, 1);
 			else
-				ret = btrfs_inc_ref(trans, root, cow, 0, 1);
+				ret = btrfs_inc_ref(trans, root, cow, 0);
 			BUG_ON(ret); /* -ENOMEM */
-			ret = btrfs_dec_ref(trans, root, buf, 1, 1);
+			ret = btrfs_dec_ref(trans, root, buf, 1);
 			BUG_ON(ret); /* -ENOMEM */
 		}
 		clean_tree_block(trans, root, buf);

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index be91397..8e29b61 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h

@@ -3326,9 +3326,9 @@
 			 u64 min_alloc_size, u64 empty_size, u64 hint_byte,
 			 struct btrfs_key *ins, int is_data, int delalloc);
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		  struct extent_buffer *buf, int full_backref, int no_quota);
+		  struct extent_buffer *buf, int full_backref);
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		  struct extent_buffer *buf, int full_backref, int no_quota);
+		  struct extent_buffer *buf, int full_backref);
 int btrfs_set_disk_extent_flags(struct btrfs_trans_handle *trans,
 				struct btrfs_root *root,
 				u64 bytenr, u64 num_bytes, u64 flags,

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 08e65e9..d0ed9e6 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c

@@ -60,8 +60,6 @@
 static void free_fs_root(struct btrfs_root *root);
 static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
 				    int read_only);
-static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
-					     struct btrfs_root *root);
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root);
 static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans,
 				      struct btrfs_root *root);
@@ -3829,34 +3827,6 @@
 	btrfs_cleanup_transaction(root);
 }
 
-static void btrfs_destroy_ordered_operations(struct btrfs_transaction *t,
-					     struct btrfs_root *root)
-{
-	struct btrfs_inode *btrfs_inode;
-	struct list_head splice;
-
-	INIT_LIST_HEAD(&splice);
-
-	mutex_lock(&root->fs_info->ordered_operations_mutex);
-	spin_lock(&root->fs_info->ordered_root_lock);
-
-	list_splice_init(&t->ordered_operations, &splice);
-	while (!list_empty(&splice)) {
-		btrfs_inode = list_entry(splice.next, struct btrfs_inode,
-					 ordered_operations);
-
-		list_del_init(&btrfs_inode->ordered_operations);
-		spin_unlock(&root->fs_info->ordered_root_lock);
-
-		btrfs_invalidate_inodes(btrfs_inode->root);
-
-		spin_lock(&root->fs_info->ordered_root_lock);
-	}
-
-	spin_unlock(&root->fs_info->ordered_root_lock);
-	mutex_unlock(&root->fs_info->ordered_operations_mutex);
-}
-
 static void btrfs_destroy_ordered_extents(struct btrfs_root *root)
 {
 	struct btrfs_ordered_extent *ordered;
@@ -4093,8 +4063,6 @@
 void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans,
 				   struct btrfs_root *root)
 {
-	btrfs_destroy_ordered_operations(cur_trans, root);
-
 	btrfs_destroy_delayed_refs(cur_trans, root);
 
 	cur_trans->state = TRANS_STATE_COMMIT_START;

diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 813537f..102ed31 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c

@@ -3057,7 +3057,7 @@
 static int __btrfs_mod_ref(struct btrfs_trans_handle *trans,
 			   struct btrfs_root *root,
 			   struct extent_buffer *buf,
-			   int full_backref, int inc, int no_quota)
+			   int full_backref, int inc)
 {
 	u64 bytenr;
 	u64 num_bytes;
@@ -3111,7 +3111,7 @@
 			key.offset -= btrfs_file_extent_offset(buf, fi);
 			ret = process_func(trans, root, bytenr, num_bytes,
 					   parent, ref_root, key.objectid,
-					   key.offset, no_quota);
+					   key.offset, 1);
 			if (ret)
 				goto fail;
 		} else {
@@ -3119,7 +3119,7 @@
 			num_bytes = btrfs_level_size(root, level - 1);
 			ret = process_func(trans, root, bytenr, num_bytes,
 					   parent, ref_root, level - 1, 0,
-					   no_quota);
+					   1);
 			if (ret)
 				goto fail;
 		}
@@ -3130,15 +3130,15 @@
 }
 
 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		  struct extent_buffer *buf, int full_backref, int no_quota)
+		  struct extent_buffer *buf, int full_backref)
 {
-	return __btrfs_mod_ref(trans, root, buf, full_backref, 1, no_quota);
+	return __btrfs_mod_ref(trans, root, buf, full_backref, 1);
 }
 
 int btrfs_dec_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
-		  struct extent_buffer *buf, int full_backref, int no_quota)
+		  struct extent_buffer *buf, int full_backref)
 {
-	return __btrfs_mod_ref(trans, root, buf, full_backref, 0, no_quota);
+	return __btrfs_mod_ref(trans, root, buf, full_backref, 0);
 }
 
 static int write_one_cache_group(struct btrfs_trans_handle *trans,
@@ -7478,6 +7478,220 @@
 	wc->reada_slot = slot;
 }
 
+static int account_leaf_items(struct btrfs_trans_handle *trans,
+			      struct btrfs_root *root,
+			      struct extent_buffer *eb)
+{
+	int nr = btrfs_header_nritems(eb);
+	int i, extent_type, ret;
+	struct btrfs_key key;
+	struct btrfs_file_extent_item *fi;
+	u64 bytenr, num_bytes;
+
+	for (i = 0; i < nr; i++) {
+		btrfs_item_key_to_cpu(eb, &key, i);
+
+		if (key.type != BTRFS_EXTENT_DATA_KEY)
+			continue;
+
+		fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
+		/* filter out non qgroup-accountable extents  */
+		extent_type = btrfs_file_extent_type(eb, fi);
+
+		if (extent_type == BTRFS_FILE_EXTENT_INLINE)
+			continue;
+
+		bytenr = btrfs_file_extent_disk_bytenr(eb, fi);
+		if (!bytenr)
+			continue;
+
+		num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi);
+
+		ret = btrfs_qgroup_record_ref(trans, root->fs_info,
+					      root->objectid,
+					      bytenr, num_bytes,
+					      BTRFS_QGROUP_OPER_SUB_SUBTREE, 0);
+		if (ret)
+			return ret;
+	}
+	return 0;
+}
+
+/*
+ * Walk up the tree from the bottom, freeing leaves and any interior
+ * nodes which have had all slots visited. If a node (leaf or
+ * interior) is freed, the node above it will have it's slot
+ * incremented. The root node will never be freed.
+ *
+ * At the end of this function, we should have a path which has all
+ * slots incremented to the next position for a search. If we need to
+ * read a new node it will be NULL and the node above it will have the
+ * correct slot selected for a later read.
+ *
+ * If we increment the root nodes slot counter past the number of
+ * elements, 1 is returned to signal completion of the search.
+ */
+static int adjust_slots_upwards(struct btrfs_root *root,
+				struct btrfs_path *path, int root_level)
+{
+	int level = 0;
+	int nr, slot;
+	struct extent_buffer *eb;
+
+	if (root_level == 0)
+		return 1;
+
+	while (level <= root_level) {
+		eb = path->nodes[level];
+		nr = btrfs_header_nritems(eb);
+		path->slots[level]++;
+		slot = path->slots[level];
+		if (slot >= nr || level == 0) {
+			/*
+			 * Don't free the root -  we will detect this
+			 * condition after our loop and return a
+			 * positive value for caller to stop walking the tree.
+			 */
+			if (level != root_level) {
+				btrfs_tree_unlock_rw(eb, path->locks[level]);
+				path->locks[level] = 0;
+
+				free_extent_buffer(eb);
+				path->nodes[level] = NULL;
+				path->slots[level] = 0;
+			}
+		} else {
+			/*
+			 * We have a valid slot to walk back down
+			 * from. Stop here so caller can process these
+			 * new nodes.
+			 */
+			break;
+		}
+
+		level++;
+	}
+
+	eb = path->nodes[root_level];
+	if (path->slots[root_level] >= btrfs_header_nritems(eb))
+		return 1;
+
+	return 0;
+}
+
+/*
+ * root_eb is the subtree root and is locked before this function is called.
+ */
+static int account_shared_subtree(struct btrfs_trans_handle *trans,
+				  struct btrfs_root *root,
+				  struct extent_buffer *root_eb,
+				  u64 root_gen,
+				  int root_level)
+{
+	int ret = 0;
+	int level;
+	struct extent_buffer *eb = root_eb;
+	struct btrfs_path *path = NULL;
+
+	BUG_ON(root_level < 0 || root_level > BTRFS_MAX_LEVEL);
+	BUG_ON(root_eb == NULL);
+
+	if (!root->fs_info->quota_enabled)
+		return 0;
+
+	if (!extent_buffer_uptodate(root_eb)) {
+		ret = btrfs_read_buffer(root_eb, root_gen);
+		if (ret)
+			goto out;
+	}
+
+	if (root_level == 0) {
+		ret = account_leaf_items(trans, root, root_eb);
+		goto out;
+	}
+
+	path = btrfs_alloc_path();
+	if (!path)
+		return -ENOMEM;
+
+	/*
+	 * Walk down the tree.  Missing extent blocks are filled in as
+	 * we go. Metadata is accounted every time we read a new
+	 * extent block.
+	 *
+	 * When we reach a leaf, we account for file extent items in it,
+	 * walk back up the tree (adjusting slot pointers as we go)
+	 * and restart the search process.
+	 */
+	extent_buffer_get(root_eb); /* For path */
+	path->nodes[root_level] = root_eb;
+	path->slots[root_level] = 0;
+	path->locks[root_level] = 0; /* so release_path doesn't try to unlock */
+walk_down:
+	level = root_level;
+	while (level >= 0) {
+		if (path->nodes[level] == NULL) {
+			int child_bsize = root->nodesize;
+			int parent_slot;
+			u64 child_gen;
+			u64 child_bytenr;
+
+			/* We need to get child blockptr/gen from
+			 * parent before we can read it. */
+			eb = path->nodes[level + 1];
+			parent_slot = path->slots[level + 1];
+			child_bytenr = btrfs_node_blockptr(eb, parent_slot);
+			child_gen = btrfs_node_ptr_generation(eb, parent_slot);
+
+			eb = read_tree_block(root, child_bytenr, child_bsize,
+					     child_gen);
+			if (!eb || !extent_buffer_uptodate(eb)) {
+				ret = -EIO;
+				goto out;
+			}
+
+			path->nodes[level] = eb;
+			path->slots[level] = 0;
+
+			btrfs_tree_read_lock(eb);
+			btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
+			path->locks[level] = BTRFS_READ_LOCK_BLOCKING;
+
+			ret = btrfs_qgroup_record_ref(trans, root->fs_info,
+						root->objectid,
+						child_bytenr,
+						child_bsize,
+						BTRFS_QGROUP_OPER_SUB_SUBTREE,
+						0);
+			if (ret)
+				goto out;
+
+		}
+
+		if (level == 0) {
+			ret = account_leaf_items(trans, root, path->nodes[level]);
+			if (ret)
+				goto out;
+
+			/* Nonzero return here means we completed our search */
+			ret = adjust_slots_upwards(root, path, root_level);
+			if (ret)
+				break;
+
+			/* Restart search with new slots */
+			goto walk_down;
+		}
+
+		level--;
+	}
+
+	ret = 0;
+out:
+	btrfs_free_path(path);
+
+	return ret;
+}
+
 /*
  * helper to process tree block while walking down the tree.
  *
@@ -7532,9 +7746,9 @@
 	/* wc->stage == UPDATE_BACKREF */
 	if (!(wc->flags[level] & flag)) {
 		BUG_ON(!path->locks[level]);
-		ret = btrfs_inc_ref(trans, root, eb, 1, wc->for_reloc);
+		ret = btrfs_inc_ref(trans, root, eb, 1);
 		BUG_ON(ret); /* -ENOMEM */
-		ret = btrfs_dec_ref(trans, root, eb, 0, wc->for_reloc);
+		ret = btrfs_dec_ref(trans, root, eb, 0);
 		BUG_ON(ret); /* -ENOMEM */
 		ret = btrfs_set_disk_extent_flags(trans, root, eb->start,
 						  eb->len, flag,
@@ -7581,6 +7795,7 @@
 	int level = wc->level;
 	int reada = 0;
 	int ret = 0;
+	bool need_account = false;
 
 	generation = btrfs_node_ptr_generation(path->nodes[level],
 					       path->slots[level]);
@@ -7626,6 +7841,7 @@
 
 	if (wc->stage == DROP_REFERENCE) {
 		if (wc->refs[level - 1] > 1) {
+			need_account = true;
 			if (level == 1 &&
 			    (wc->flags[0] & BTRFS_BLOCK_FLAG_FULL_BACKREF))
 				goto skip;
@@ -7689,6 +7905,16 @@
 			parent = 0;
 		}
 
+		if (need_account) {
+			ret = account_shared_subtree(trans, root, next,
+						     generation, level - 1);
+			if (ret) {
+				printk_ratelimited(KERN_ERR "BTRFS: %s Error "
+					"%d accounting shared subtree. Quota "
+					"is out of sync, rescan required.\n",
+					root->fs_info->sb->s_id, ret);
+			}
+		}
 		ret = btrfs_free_extent(trans, root, bytenr, blocksize, parent,
 				root->root_key.objectid, level - 1, 0, 0);
 		BUG_ON(ret); /* -ENOMEM */
@@ -7769,12 +7995,17 @@
 	if (wc->refs[level] == 1) {
 		if (level == 0) {
 			if (wc->flags[level] & BTRFS_BLOCK_FLAG_FULL_BACKREF)
-				ret = btrfs_dec_ref(trans, root, eb, 1,
-						    wc->for_reloc);
+				ret = btrfs_dec_ref(trans, root, eb, 1);
 			else
-				ret = btrfs_dec_ref(trans, root, eb, 0,
-						    wc->for_reloc);
+				ret = btrfs_dec_ref(trans, root, eb, 0);
 			BUG_ON(ret); /* -ENOMEM */
+			ret = account_leaf_items(trans, root, eb);
+			if (ret) {
+				printk_ratelimited(KERN_ERR "BTRFS: %s Error "
+					"%d accounting leaf items. Quota "
+					"is out of sync, rescan required.\n",
+					root->fs_info->sb->s_id, ret);
+			}
 		}
 		/* make block locked assertion in clean_tree_block happy */
 		if (!path->locks[level] &&
@@ -7900,6 +8131,8 @@
 	int level;
 	bool root_dropped = false;
 
+	btrfs_debug(root->fs_info, "Drop subvolume %llu", root->objectid);
+
 	path = btrfs_alloc_path();
 	if (!path) {
 		err = -ENOMEM;
@@ -8025,6 +8258,24 @@
 				goto out_end_trans;
 			}
 
+			/*
+			 * Qgroup update accounting is run from
+			 * delayed ref handling. This usually works
+			 * out because delayed refs are normally the
+			 * only way qgroup updates are added. However,
+			 * we may have added updates during our tree
+			 * walk so run qgroups here to make sure we
+			 * don't lose any updates.
+			 */
+			ret = btrfs_delayed_qgroup_accounting(trans,
+							      root->fs_info);
+			if (ret)
+				printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
+						   "running qgroup updates "
+						   "during snapshot delete. "
+						   "Quota is out of sync, "
+						   "rescan required.\n", ret);
+
 			btrfs_end_transaction_throttle(trans, tree_root);
 			if (!for_reloc && btrfs_need_cleaner_sleep(root)) {
 				pr_debug("BTRFS: drop snapshot early exit\n");
@@ -8078,6 +8329,14 @@
 	}
 	root_dropped = true;
 out_end_trans:
+	ret = btrfs_delayed_qgroup_accounting(trans, tree_root->fs_info);
+	if (ret)
+		printk_ratelimited(KERN_ERR "BTRFS: Failure %d "
+				   "running qgroup updates "
+				   "during snapshot delete. "
+				   "Quota is out of sync, "
+				   "rescan required.\n", ret);
+
 	btrfs_end_transaction_throttle(trans, tree_root);
 out_free:
 	kfree(wc);

diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index f46cfe4..54c84da 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c

@@ -756,7 +756,7 @@
 				found_next = 1;
 			if (ret != 0)
 				goto insert;
-			slot = 0;
+			slot = path->slots[0];
 		}
 		btrfs_item_key_to_cpu(path->nodes[0], &found_key, slot);
 		if (found_key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||

diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 1f2b99c..d3afac2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c

@@ -1838,33 +1838,9 @@
 
 int btrfs_release_file(struct inode *inode, struct file *filp)
 {
-	/*
-	 * ordered_data_close is set by settattr when we are about to truncate
-	 * a file from a non-zero size to a zero size.  This tries to
-	 * flush down new bytes that may have been written if the
-	 * application were using truncate to replace a file in place.
-	 */
-	if (test_and_clear_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
-			       &BTRFS_I(inode)->runtime_flags)) {
-		struct btrfs_trans_handle *trans;
-		struct btrfs_root *root = BTRFS_I(inode)->root;
-
-		/*
-		 * We need to block on a committing transaction to keep us from
-		 * throwing a ordered operation on to the list and causing
-		 * something like sync to deadlock trying to flush out this
-		 * inode.
-		 */
-		trans = btrfs_start_transaction(root, 0);
-		if (IS_ERR(trans))
-			return PTR_ERR(trans);
-		btrfs_add_ordered_operation(trans, BTRFS_I(inode)->root, inode);
-		btrfs_end_transaction(trans, root);
-		if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
-			filemap_flush(inode->i_mapping);
-	}
 	if (filp->private_data)
 		btrfs_ioctl_trans_end(filp);
+	filemap_flush(inode->i_mapping);
 	return 0;
 }
 

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3183742..03708ef 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c

@@ -709,6 +709,18 @@
 				unlock_extent(io_tree, async_extent->start,
 					      async_extent->start +
 					      async_extent->ram_size - 1);
+
+				/*
+				 * we need to redirty the pages if we decide to
+				 * fallback to uncompressed IO, otherwise we
+				 * will not submit these pages down to lower
+				 * layers.
+				 */
+				extent_range_redirty_for_io(inode,
+						async_extent->start,
+						async_extent->start +
+						async_extent->ram_size - 1);
+
 				goto retry;
 			}
 			goto out_free;
@@ -7939,27 +7951,6 @@
 	BUG_ON(ret);
 
 	/*
-	 * setattr is responsible for setting the ordered_data_close flag,
-	 * but that is only tested during the last file release.  That
-	 * could happen well after the next commit, leaving a great big
-	 * window where new writes may get lost if someone chooses to write
-	 * to this file after truncating to zero
-	 *
-	 * The inode doesn't have any dirty data here, and so if we commit
-	 * this is a noop.  If someone immediately starts writing to the inode
-	 * it is very likely we'll catch some of their writes in this
-	 * transaction, and the commit will find this file on the ordered
-	 * data list with good things to send down.
-	 *
-	 * This is a best effort solution, there is still a window where
-	 * using truncate to replace the contents of the file will
-	 * end up with a zero length file after a crash.
-	 */
-	if (inode->i_size == 0 && test_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
-					   &BTRFS_I(inode)->runtime_flags))
-		btrfs_add_ordered_operation(trans, root, inode);
-
-	/*
 	 * So if we truncate and then write and fsync we normally would just
 	 * write the extents that changed, which is a problem if we need to
 	 * first truncate that entire inode.  So set this flag so we write out
@@ -8106,7 +8097,6 @@
 	mutex_init(&ei->delalloc_mutex);
 	btrfs_ordered_inode_tree_init(&ei->ordered_tree);
 	INIT_LIST_HEAD(&ei->delalloc_inodes);
-	INIT_LIST_HEAD(&ei->ordered_operations);
 	RB_CLEAR_NODE(&ei->rb_node);
 
 	return inode;
@@ -8146,17 +8136,6 @@
 	if (!root)
 		goto free;
 
-	/*
-	 * Make sure we're properly removed from the ordered operation
-	 * lists.
-	 */
-	smp_mb();
-	if (!list_empty(&BTRFS_I(inode)->ordered_operations)) {
-		spin_lock(&root->fs_info->ordered_root_lock);
-		list_del_init(&BTRFS_I(inode)->ordered_operations);
-		spin_unlock(&root->fs_info->ordered_root_lock);
-	}
-
 	if (test_bit(BTRFS_INODE_HAS_ORPHAN_ITEM,
 		     &BTRFS_I(inode)->runtime_flags)) {
 		btrfs_info(root->fs_info, "inode %llu still on the orphan list",
@@ -8338,12 +8317,10 @@
 	ret = 0;
 
 	/*
-	 * we're using rename to replace one file with another.
-	 * and the replacement file is large.  Start IO on it now so
-	 * we don't add too much work to the end of the transaction
+	 * we're using rename to replace one file with another.  Start IO on it
+	 * now so  we don't add too much work to the end of the transaction
 	 */
-	if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size &&
-	    old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT)
+	if (new_inode && S_ISREG(old_inode->i_mode) && new_inode->i_size)
 		filemap_flush(old_inode->i_mapping);
 
 	/* close the racy window with snapshot create/destroy ioctl */
@@ -8391,12 +8368,6 @@
 		 */
 		btrfs_pin_log_trans(root);
 	}
-	/*
-	 * make sure the inode gets flushed if it is replacing
-	 * something.
-	 */
-	if (new_inode && new_inode->i_size && S_ISREG(old_inode->i_mode))
-		btrfs_add_ordered_operation(trans, root, old_inode);
 
 	inode_inc_iversion(old_dir);
 	inode_inc_iversion(new_dir);

diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 7187b14..963895c 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c

@@ -571,18 +571,6 @@
 
 	trace_btrfs_ordered_extent_remove(inode, entry);
 
-	/*
-	 * we have no more ordered extents for this inode and
-	 * no dirty pages.  We can safely remove it from the
-	 * list of ordered extents
-	 */
-	if (RB_EMPTY_ROOT(&tree->tree) &&
-	    !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) {
-		spin_lock(&root->fs_info->ordered_root_lock);
-		list_del_init(&BTRFS_I(inode)->ordered_operations);
-		spin_unlock(&root->fs_info->ordered_root_lock);
-	}
-
 	if (!root->nr_ordered_extents) {
 		spin_lock(&root->fs_info->ordered_root_lock);
 		BUG_ON(list_empty(&root->ordered_root));
@@ -687,81 +675,6 @@
 }
 
 /*
- * this is used during transaction commit to write all the inodes
- * added to the ordered operation list.  These files must be fully on
- * disk before the transaction commits.
- *
- * we have two modes here, one is to just start the IO via filemap_flush
- * and the other is to wait for all the io.  When we wait, we have an
- * extra check to make sure the ordered operation list really is empty
- * before we return
- */
-int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
-				 struct btrfs_root *root, int wait)
-{
-	struct btrfs_inode *btrfs_inode;
-	struct inode *inode;
-	struct btrfs_transaction *cur_trans = trans->transaction;
-	struct list_head splice;
-	struct list_head works;
-	struct btrfs_delalloc_work *work, *next;
-	int ret = 0;
-
-	INIT_LIST_HEAD(&splice);
-	INIT_LIST_HEAD(&works);
-
-	mutex_lock(&root->fs_info->ordered_extent_flush_mutex);
-	spin_lock(&root->fs_info->ordered_root_lock);
-	list_splice_init(&cur_trans->ordered_operations, &splice);
-	while (!list_empty(&splice)) {
-		btrfs_inode = list_entry(splice.next, struct btrfs_inode,
-				   ordered_operations);
-		inode = &btrfs_inode->vfs_inode;
-
-		list_del_init(&btrfs_inode->ordered_operations);
-
-		/*
-		 * the inode may be getting freed (in sys_unlink path).
-		 */
-		inode = igrab(inode);
-		if (!inode)
-			continue;
-
-		if (!wait)
-			list_add_tail(&BTRFS_I(inode)->ordered_operations,
-				      &cur_trans->ordered_operations);
-		spin_unlock(&root->fs_info->ordered_root_lock);
-
-		work = btrfs_alloc_delalloc_work(inode, wait, 1);
-		if (!work) {
-			spin_lock(&root->fs_info->ordered_root_lock);
-			if (list_empty(&BTRFS_I(inode)->ordered_operations))
-				list_add_tail(&btrfs_inode->ordered_operations,
-					      &splice);
-			list_splice_tail(&splice,
-					 &cur_trans->ordered_operations);
-			spin_unlock(&root->fs_info->ordered_root_lock);
-			ret = -ENOMEM;
-			goto out;
-		}
-		list_add_tail(&work->list, &works);
-		btrfs_queue_work(root->fs_info->flush_workers,
-				 &work->work);
-
-		cond_resched();
-		spin_lock(&root->fs_info->ordered_root_lock);
-	}
-	spin_unlock(&root->fs_info->ordered_root_lock);
-out:
-	list_for_each_entry_safe(work, next, &works, list) {
-		list_del_init(&work->list);
-		btrfs_wait_and_free_delalloc_work(work);
-	}
-	mutex_unlock(&root->fs_info->ordered_extent_flush_mutex);
-	return ret;
-}
-
-/*
  * Used to start IO or wait for a given ordered extent to finish.
  *
  * If wait is one, this effectively waits on page writeback for all the pages
@@ -1120,42 +1033,6 @@
 	return index;
 }
 
-
-/*
- * add a given inode to the list of inodes that must be fully on
- * disk before a transaction commit finishes.
- *
- * This basically gives us the ext3 style data=ordered mode, and it is mostly
- * used to make sure renamed files are fully on disk.
- *
- * It is a noop if the inode is already fully on disk.
- *
- * If trans is not null, we'll do a friendly check for a transaction that
- * is already flushing things and force the IO down ourselves.
- */
-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
-				 struct btrfs_root *root, struct inode *inode)
-{
-	struct btrfs_transaction *cur_trans = trans->transaction;
-	u64 last_mod;
-
-	last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans);
-
-	/*
-	 * if this file hasn't been changed since the last transaction
-	 * commit, we can safely return without doing anything
-	 */
-	if (last_mod <= root->fs_info->last_trans_committed)
-		return;
-
-	spin_lock(&root->fs_info->ordered_root_lock);
-	if (list_empty(&BTRFS_I(inode)->ordered_operations)) {
-		list_add_tail(&BTRFS_I(inode)->ordered_operations,
-			      &cur_trans->ordered_operations);
-	}
-	spin_unlock(&root->fs_info->ordered_root_lock);
-}
-
 int __init ordered_data_init(void)
 {
 	btrfs_ordered_extent_cache = kmem_cache_create("btrfs_ordered_extent",

diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 2468970..d81a274 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h

@@ -190,11 +190,6 @@
 				struct btrfs_ordered_extent *ordered);
 int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr,
 			   u32 *sum, int len);
-int btrfs_run_ordered_operations(struct btrfs_trans_handle *trans,
-				 struct btrfs_root *root, int wait);
-void btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
-				 struct btrfs_root *root,
-				 struct inode *inode);
 int btrfs_wait_ordered_extents(struct btrfs_root *root, int nr);
 void btrfs_wait_ordered_roots(struct btrfs_fs_info *fs_info, int nr);
 void btrfs_get_logged_extents(struct inode *inode,

diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 98cb6b2..b497498 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c

@@ -1201,6 +1201,50 @@
 	mutex_unlock(&fs_info->qgroup_ioctl_lock);
 	return ret;
 }
+
+static int comp_oper_exist(struct btrfs_qgroup_operation *oper1,
+			   struct btrfs_qgroup_operation *oper2)
+{
+	/*
+	 * Ignore seq and type here, we're looking for any operation
+	 * at all related to this extent on that root.
+	 */
+	if (oper1->bytenr < oper2->bytenr)
+		return -1;
+	if (oper1->bytenr > oper2->bytenr)
+		return 1;
+	if (oper1->ref_root < oper2->ref_root)
+		return -1;
+	if (oper1->ref_root > oper2->ref_root)
+		return 1;
+	return 0;
+}
+
+static int qgroup_oper_exists(struct btrfs_fs_info *fs_info,
+			      struct btrfs_qgroup_operation *oper)
+{
+	struct rb_node *n;
+	struct btrfs_qgroup_operation *cur;
+	int cmp;
+
+	spin_lock(&fs_info->qgroup_op_lock);
+	n = fs_info->qgroup_op_tree.rb_node;
+	while (n) {
+		cur = rb_entry(n, struct btrfs_qgroup_operation, n);
+		cmp = comp_oper_exist(cur, oper);
+		if (cmp < 0) {
+			n = n->rb_right;
+		} else if (cmp) {
+			n = n->rb_left;
+		} else {
+			spin_unlock(&fs_info->qgroup_op_lock);
+			return -EEXIST;
+		}
+	}
+	spin_unlock(&fs_info->qgroup_op_lock);
+	return 0;
+}
+
 static int comp_oper(struct btrfs_qgroup_operation *oper1,
 		     struct btrfs_qgroup_operation *oper2)
 {
@@ -1290,6 +1334,23 @@
 	oper->seq = atomic_inc_return(&fs_info->qgroup_op_seq);
 	INIT_LIST_HEAD(&oper->elem.list);
 	oper->elem.seq = 0;
+
+	if (type == BTRFS_QGROUP_OPER_SUB_SUBTREE) {
+		/*
+		 * If any operation for this bytenr/ref_root combo
+		 * exists, then we know it's not exclusively owned and
+		 * shouldn't be queued up.
+		 *
+		 * This also catches the case where we have a cloned
+		 * extent that gets queued up multiple times during
+		 * drop snapshot.
+		 */
+		if (qgroup_oper_exists(fs_info, oper)) {
+			kfree(oper);
+			return 0;
+		}
+	}
+
 	ret = insert_qgroup_oper(fs_info, oper);
 	if (ret) {
 		/* Shouldn't happen so have an assert for developers */
@@ -1884,6 +1945,111 @@
 }
 
 /*
+ * Process a reference to a shared subtree. This type of operation is
+ * queued during snapshot removal when we encounter extents which are
+ * shared between more than one root.
+ */
+static int qgroup_subtree_accounting(struct btrfs_trans_handle *trans,
+				     struct btrfs_fs_info *fs_info,
+				     struct btrfs_qgroup_operation *oper)
+{
+	struct ulist *roots = NULL;
+	struct ulist_node *unode;
+	struct ulist_iterator uiter;
+	struct btrfs_qgroup_list *glist;
+	struct ulist *parents;
+	int ret = 0;
+	int err;
+	struct btrfs_qgroup *qg;
+	u64 root_obj = 0;
+	struct seq_list elem = {};
+
+	parents = ulist_alloc(GFP_NOFS);
+	if (!parents)
+		return -ENOMEM;
+
+	btrfs_get_tree_mod_seq(fs_info, &elem);
+	ret = btrfs_find_all_roots(trans, fs_info, oper->bytenr,
+				   elem.seq, &roots);
+	btrfs_put_tree_mod_seq(fs_info, &elem);
+	if (ret < 0)
+		return ret;
+
+	if (roots->nnodes != 1)
+		goto out;
+
+	ULIST_ITER_INIT(&uiter);
+	unode = ulist_next(roots, &uiter); /* Only want 1 so no need to loop */
+	/*
+	 * If we find our ref root then that means all refs
+	 * this extent has to the root have not yet been
+	 * deleted. In that case, we do nothing and let the
+	 * last ref for this bytenr drive our update.
+	 *
+	 * This can happen for example if an extent is
+	 * referenced multiple times in a snapshot (clone,
+	 * etc). If we are in the middle of snapshot removal,
+	 * queued updates for such an extent will find the
+	 * root if we have not yet finished removing the
+	 * snapshot.
+	 */
+	if (unode->val == oper->ref_root)
+		goto out;
+
+	root_obj = unode->val;
+	BUG_ON(!root_obj);
+
+	spin_lock(&fs_info->qgroup_lock);
+	qg = find_qgroup_rb(fs_info, root_obj);
+	if (!qg)
+		goto out_unlock;
+
+	qg->excl += oper->num_bytes;
+	qg->excl_cmpr += oper->num_bytes;
+	qgroup_dirty(fs_info, qg);
+
+	/*
+	 * Adjust counts for parent groups. First we find all
+	 * parents, then in the 2nd loop we do the adjustment
+	 * while adding parents of the parents to our ulist.
+	 */
+	list_for_each_entry(glist, &qg->groups, next_group) {
+		err = ulist_add(parents, glist->group->qgroupid,
+				ptr_to_u64(glist->group), GFP_ATOMIC);
+		if (err < 0) {
+			ret = err;
+			goto out_unlock;
+		}
+	}
+
+	ULIST_ITER_INIT(&uiter);
+	while ((unode = ulist_next(parents, &uiter))) {
+		qg = u64_to_ptr(unode->aux);
+		qg->excl += oper->num_bytes;
+		qg->excl_cmpr += oper->num_bytes;
+		qgroup_dirty(fs_info, qg);
+
+		/* Add any parents of the parents */
+		list_for_each_entry(glist, &qg->groups, next_group) {
+			err = ulist_add(parents, glist->group->qgroupid,
+					ptr_to_u64(glist->group), GFP_ATOMIC);
+			if (err < 0) {
+				ret = err;
+				goto out_unlock;
+			}
+		}
+	}
+
+out_unlock:
+	spin_unlock(&fs_info->qgroup_lock);
+
+out:
+	ulist_free(roots);
+	ulist_free(parents);
+	return ret;
+}
+
+/*
  * btrfs_qgroup_account_ref is called for every ref that is added to or deleted
  * from the fs. First, all roots referencing the extent are searched, and
  * then the space is accounted accordingly to the different roots. The
@@ -1920,6 +2086,9 @@
 	case BTRFS_QGROUP_OPER_SUB_SHARED:
 		ret = qgroup_shared_accounting(trans, fs_info, oper);
 		break;
+	case BTRFS_QGROUP_OPER_SUB_SUBTREE:
+		ret = qgroup_subtree_accounting(trans, fs_info, oper);
+		break;
 	default:
 		ASSERT(0);
 	}

diff --git a/fs/btrfs/qgroup.h b/fs/btrfs/qgroup.h
index 5952ff1..18cc68c 100644
--- a/fs/btrfs/qgroup.h
+++ b/fs/btrfs/qgroup.h

@@ -44,6 +44,7 @@
 	BTRFS_QGROUP_OPER_ADD_SHARED,
 	BTRFS_QGROUP_OPER_SUB_EXCL,
 	BTRFS_QGROUP_OPER_SUB_SHARED,
+	BTRFS_QGROUP_OPER_SUB_SUBTREE,
 };
 
 struct btrfs_qgroup_operation {

diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 67b48b9..c4124de 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c

@@ -1665,6 +1665,21 @@
 	return 0;
 }
 
+/*
+ * Calculate numbers for 'df', pessimistic in case of mixed raid profiles.
+ *
+ * If there's a redundant raid level at DATA block groups, use the respective
+ * multiplier to scale the sizes.
+ *
+ * Unused device space usage is based on simulating the chunk allocator
+ * algorithm that respects the device sizes, order of allocations and the
+ * 'alloc_start' value, this is a close approximation of the actual use but
+ * there are other factors that may change the result (like a new metadata
+ * chunk).
+ *
+ * FIXME: not accurate for mixed block groups, total and free/used are ok,
+ * available appears slightly larger.
+ */
 static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
 	struct btrfs_fs_info *fs_info = btrfs_sb(dentry->d_sb);
@@ -1675,6 +1690,8 @@
 	u64 total_free_data = 0;
 	int bits = dentry->d_sb->s_blocksize_bits;
 	__be32 *fsid = (__be32 *)fs_info->fsid;
+	unsigned factor = 1;
+	struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv;
 	int ret;
 
 	/* holding chunk_muext to avoid allocating new chunks */
@@ -1682,30 +1699,52 @@
 	rcu_read_lock();
 	list_for_each_entry_rcu(found, head, list) {
 		if (found->flags & BTRFS_BLOCK_GROUP_DATA) {
+			int i;
+
 			total_free_data += found->disk_total - found->disk_used;
 			total_free_data -=
 				btrfs_account_ro_block_groups_free_space(found);
+
+			for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) {
+				if (!list_empty(&found->block_groups[i])) {
+					switch (i) {
+					case BTRFS_RAID_DUP:
+					case BTRFS_RAID_RAID1:
+					case BTRFS_RAID_RAID10:
+						factor = 2;
+					}
+				}
+			}
 		}
 
 		total_used += found->disk_used;
 	}
+
 	rcu_read_unlock();
 
-	buf->f_namelen = BTRFS_NAME_LEN;
-	buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits;
-	buf->f_bfree = buf->f_blocks - (total_used >> bits);
-	buf->f_bsize = dentry->d_sb->s_blocksize;
-	buf->f_type = BTRFS_SUPER_MAGIC;
+	buf->f_blocks = div_u64(btrfs_super_total_bytes(disk_super), factor);
+	buf->f_blocks >>= bits;
+	buf->f_bfree = buf->f_blocks - (div_u64(total_used, factor) >> bits);
+
+	/* Account global block reserve as used, it's in logical size already */
+	spin_lock(&block_rsv->lock);
+	buf->f_bfree -= block_rsv->size >> bits;
+	spin_unlock(&block_rsv->lock);
+
 	buf->f_bavail = total_free_data;
 	ret = btrfs_calc_avail_data_space(fs_info->tree_root, &total_free_data);
 	if (ret) {
 		mutex_unlock(&fs_info->chunk_mutex);
 		return ret;
 	}
-	buf->f_bavail += total_free_data;
+	buf->f_bavail += div_u64(total_free_data, factor);
 	buf->f_bavail = buf->f_bavail >> bits;
 	mutex_unlock(&fs_info->chunk_mutex);
 
+	buf->f_type = BTRFS_SUPER_MAGIC;
+	buf->f_bsize = dentry->d_sb->s_blocksize;
+	buf->f_namelen = BTRFS_NAME_LEN;
+
 	/* We treat it as constant endianness (it doesn't matter _which_)
 	   because we want the fsid to come out the same whether mounted
 	   on a big-endian or little-endian host */

diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 5f379af..d89c6d3 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c

@@ -218,7 +218,6 @@
 	spin_lock_init(&cur_trans->delayed_refs.lock);
 
 	INIT_LIST_HEAD(&cur_trans->pending_snapshots);
-	INIT_LIST_HEAD(&cur_trans->ordered_operations);
 	INIT_LIST_HEAD(&cur_trans->pending_chunks);
 	INIT_LIST_HEAD(&cur_trans->switch_commits);
 	list_add_tail(&cur_trans->list, &fs_info->trans_list);
@@ -1612,27 +1611,6 @@
 	kmem_cache_free(btrfs_trans_handle_cachep, trans);
 }
 
-static int btrfs_flush_all_pending_stuffs(struct btrfs_trans_handle *trans,
-					  struct btrfs_root *root)
-{
-	int ret;
-
-	ret = btrfs_run_delayed_items(trans, root);
-	if (ret)
-		return ret;
-
-	/*
-	 * rename don't use btrfs_join_transaction, so, once we
-	 * set the transaction to blocked above, we aren't going
-	 * to get any new ordered operations.  We can safely run
-	 * it here and no for sure that nothing new will be added
-	 * to the list
-	 */
-	ret = btrfs_run_ordered_operations(trans, root, 1);
-
-	return ret;
-}
-
 static inline int btrfs_start_delalloc_flush(struct btrfs_fs_info *fs_info)
 {
 	if (btrfs_test_opt(fs_info->tree_root, FLUSHONCOMMIT))
@@ -1653,13 +1631,6 @@
 	struct btrfs_transaction *prev_trans = NULL;
 	int ret;
 
-	ret = btrfs_run_ordered_operations(trans, root, 0);
-	if (ret) {
-		btrfs_abort_transaction(trans, root, ret);
-		btrfs_end_transaction(trans, root);
-		return ret;
-	}
-
 	/* Stop the commit early if ->aborted is set */
 	if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
 		ret = cur_trans->aborted;
@@ -1740,7 +1711,7 @@
 	if (ret)
 		goto cleanup_transaction;
 
-	ret = btrfs_flush_all_pending_stuffs(trans, root);
+	ret = btrfs_run_delayed_items(trans, root);
 	if (ret)
 		goto cleanup_transaction;
 
@@ -1748,7 +1719,7 @@
 		   extwriter_counter_read(cur_trans) == 0);
 
 	/* some pending stuffs might be added after the previous flush. */
-	ret = btrfs_flush_all_pending_stuffs(trans, root);
+	ret = btrfs_run_delayed_items(trans, root);
 	if (ret)
 		goto cleanup_transaction;
 

diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 7dd558e..579be51 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h

@@ -55,7 +55,6 @@
 	wait_queue_head_t writer_wait;
 	wait_queue_head_t commit_wait;
 	struct list_head pending_snapshots;
-	struct list_head ordered_operations;
 	struct list_head pending_chunks;
 	struct list_head switch_commits;
 	struct btrfs_delayed_ref_root delayed_refs;

diff --git a/fs/btrfs/ulist.h b/fs/btrfs/ulist.h
index 7f78cbf..4c29db6 100644
--- a/fs/btrfs/ulist.h
+++ b/fs/btrfs/ulist.h

@@ -57,6 +57,21 @@
 int ulist_add(struct ulist *ulist, u64 val, u64 aux, gfp_t gfp_mask);
 int ulist_add_merge(struct ulist *ulist, u64 val, u64 aux,
 		    u64 *old_aux, gfp_t gfp_mask);
+
+/* just like ulist_add_merge() but take a pointer for the aux data */
+static inline int ulist_add_merge_ptr(struct ulist *ulist, u64 val, void *aux,
+				      void **old_aux, gfp_t gfp_mask)
+{
+#if BITS_PER_LONG == 32
+	u64 old64 = (uintptr_t)*old_aux;
+	int ret = ulist_add_merge(ulist, val, (uintptr_t)aux, &old64, gfp_mask);
+	*old_aux = (void *)((uintptr_t)old64);
+	return ret;
+#else
+	return ulist_add_merge(ulist, val, (u64)aux, (u64 *)old_aux, gfp_mask);
+#endif
+}
+
 struct ulist_node *ulist_next(struct ulist *ulist,
 			      struct ulist_iterator *uiter);
 

diff --git a/fs/locks.c b/fs/locks.c
index a6f5480..cb66fb0 100644
--- a/fs/locks.c
+++ b/fs/locks.c

@@ -247,6 +247,18 @@
 }
 EXPORT_SYMBOL(locks_free_lock);
 
+static void
+locks_dispose_list(struct list_head *dispose)
+{
+	struct file_lock *fl;
+
+	while (!list_empty(dispose)) {
+		fl = list_first_entry(dispose, struct file_lock, fl_block);
+		list_del_init(&fl->fl_block);
+		locks_free_lock(fl);
+	}
+}
+
 void locks_init_lock(struct file_lock *fl)
 {
 	memset(fl, 0, sizeof(struct file_lock));
@@ -285,7 +297,8 @@
 
 void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
-	locks_release_private(new);
+	/* "new" must be a freshly-initialized lock */
+	WARN_ON_ONCE(new->fl_ops);
 
 	__locks_copy_lock(new, fl);
 	new->fl_file = fl->fl_file;
@@ -650,12 +663,16 @@
  *
  * Must be called with i_lock held!
  */
-static void locks_delete_lock(struct file_lock **thisfl_p)
+static void locks_delete_lock(struct file_lock **thisfl_p,
+			      struct list_head *dispose)
 {
 	struct file_lock *fl = *thisfl_p;
 
 	locks_unlink_lock(thisfl_p);
-	locks_free_lock(fl);
+	if (dispose)
+		list_add(&fl->fl_block, dispose);
+	else
+		locks_free_lock(fl);
 }
 
 /* Determine if lock sys_fl blocks lock caller_fl. Common functionality
@@ -811,6 +828,7 @@
 	struct inode * inode = file_inode(filp);
 	int error = 0;
 	int found = 0;
+	LIST_HEAD(dispose);
 
 	if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
 		new_fl = locks_alloc_lock();
@@ -833,7 +851,7 @@
 		if (request->fl_type == fl->fl_type)
 			goto out;
 		found = 1;
-		locks_delete_lock(before);
+		locks_delete_lock(before, &dispose);
 		break;
 	}
 
@@ -880,6 +898,7 @@
 	spin_unlock(&inode->i_lock);
 	if (new_fl)
 		locks_free_lock(new_fl);
+	locks_dispose_list(&dispose);
 	return error;
 }
 
@@ -893,6 +912,7 @@
 	struct file_lock **before;
 	int error;
 	bool added = false;
+	LIST_HEAD(dispose);
 
 	/*
 	 * We may need two file_lock structures for this operation,
@@ -988,7 +1008,7 @@
 			else
 				request->fl_end = fl->fl_end;
 			if (added) {
-				locks_delete_lock(before);
+				locks_delete_lock(before, &dispose);
 				continue;
 			}
 			request = fl;
@@ -1018,21 +1038,24 @@
 				 * one (This may happen several times).
 				 */
 				if (added) {
-					locks_delete_lock(before);
+					locks_delete_lock(before, &dispose);
 					continue;
 				}
-				/* Replace the old lock with the new one.
-				 * Wake up anybody waiting for the old one,
-				 * as the change in lock type might satisfy
-				 * their needs.
+				/*
+				 * Replace the old lock with new_fl, and
+				 * remove the old one. It's safe to do the
+				 * insert here since we know that we won't be
+				 * using new_fl later, and that the lock is
+				 * just replacing an existing lock.
 				 */
-				locks_wake_up_blocks(fl);
-				fl->fl_start = request->fl_start;
-				fl->fl_end = request->fl_end;
-				fl->fl_type = request->fl_type;
-				locks_release_private(fl);
-				locks_copy_private(fl, request);
-				request = fl;
+				error = -ENOLCK;
+				if (!new_fl)
+					goto out;
+				locks_copy_lock(new_fl, request);
+				request = new_fl;
+				new_fl = NULL;
+				locks_delete_lock(before, &dispose);
+				locks_insert_lock(before, request);
 				added = true;
 			}
 		}
@@ -1093,6 +1116,7 @@
 		locks_free_lock(new_fl);
 	if (new_fl2)
 		locks_free_lock(new_fl2);
+	locks_dispose_list(&dispose);
 	return error;
 }
 
@@ -1268,7 +1292,7 @@
 			printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
 			fl->fl_fasync = NULL;
 		}
-		locks_delete_lock(before);
+		locks_delete_lock(before, NULL);
 	}
 	return 0;
 }
@@ -1737,13 +1761,10 @@
 	ret = fl;
 	spin_lock(&inode->i_lock);
 	error = __vfs_setlease(filp, arg, &ret);
-	if (error) {
-		spin_unlock(&inode->i_lock);
-		locks_free_lock(fl);
-		goto out_free_fasync;
-	}
-	if (ret != fl)
-		locks_free_lock(fl);
+	if (error)
+		goto out_unlock;
+	if (ret == fl)
+		fl = NULL;
 
 	/*
 	 * fasync_insert_entry() returns the old entry if any.
@@ -1755,9 +1776,10 @@
 		new = NULL;
 
 	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+out_unlock:
 	spin_unlock(&inode->i_lock);
-
-out_free_fasync:
+	if (fl)
+		locks_free_lock(fl);
 	if (new)
 		fasync_free(new);
 	return error;
@@ -2320,6 +2342,7 @@
 	struct inode * inode = file_inode(filp);
 	struct file_lock *fl;
 	struct file_lock **before;
+	LIST_HEAD(dispose);
 
 	if (!inode->i_flock)
 		return;
@@ -2365,12 +2388,13 @@
 				fl->fl_type, fl->fl_flags,
 				fl->fl_start, fl->fl_end);
 
-			locks_delete_lock(before);
+			locks_delete_lock(before, &dispose);
 			continue;
  		}
 		before = &fl->fl_next;
 	}
 	spin_unlock(&inode->i_lock);
+	locks_dispose_list(&dispose);
 }
 
 /**
@@ -2452,7 +2476,11 @@
 			seq_puts(f, "FLOCK  ADVISORY  ");
 		}
 	} else if (IS_LEASE(fl)) {
-		seq_puts(f, "LEASE  ");
+		if (fl->fl_flags & FL_DELEG)
+			seq_puts(f, "DELEG  ");
+		else
+			seq_puts(f, "LEASE  ");
+
 		if (lease_breaking(fl))
 			seq_puts(f, "BREAKING  ");
 		else if (fl->fl_file)

diff --git a/kernel/module.c b/kernel/module.c
index 6f69463..03214bd2 100644
--- a/kernel/module.c
+++ b/kernel/module.c

@@ -3304,6 +3304,11 @@
 	mutex_lock(&module_mutex);
 	module_bug_cleanup(mod);
 	mutex_unlock(&module_mutex);
+
+	/* we can't deallocate the module until we clear memory protection */
+	unset_module_init_ro_nx(mod);
+	unset_module_core_ro_nx(mod);
+
  ddebug_cleanup:
 	dynamic_debug_remove(info->debug);
 	synchronize_sched();

diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c
index d0396af..5b1b807 100644
--- a/tools/power/x86/turbostat/turbostat.c
+++ b/tools/power/x86/turbostat/turbostat.c

@@ -267,90 +267,90 @@
 /*
  * Example Format w/ field column widths:
  *
- * Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     SMI   %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
- * 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567 1234567
+ *  Package    Core     CPU Avg_MHz Bzy_MHz TSC_MHz     SMI   %Busy CPU_%c1 CPU_%c3 CPU_%c6 CPU_%c7 CoreTmp  PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt
+ * 123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678123456781234567812345678
  */
 
 void print_header(void)
 {
 	if (show_pkg)
-		outp += sprintf(outp, "Package ");
+		outp += sprintf(outp, " Package");
 	if (show_core)
-		outp += sprintf(outp, "    Core ");
+		outp += sprintf(outp, "    Core");
 	if (show_cpu)
-		outp += sprintf(outp, "    CPU ");
+		outp += sprintf(outp, "     CPU");
 	if (has_aperf)
-		outp += sprintf(outp, "Avg_MHz ");
+		outp += sprintf(outp, " Avg_MHz");
 	if (do_nhm_cstates)
-		outp += sprintf(outp, "  %%Busy ");
+		outp += sprintf(outp, "   %%Busy");
 	if (has_aperf)
-		outp += sprintf(outp, "Bzy_MHz ");
-	outp += sprintf(outp, "TSC_MHz ");
+		outp += sprintf(outp, " Bzy_MHz");
+	outp += sprintf(outp, " TSC_MHz");
 	if (do_smi)
-		outp += sprintf(outp, "    SMI ");
+		outp += sprintf(outp, "     SMI");
 	if (extra_delta_offset32)
-		outp += sprintf(outp, " count 0x%03X ", extra_delta_offset32);
+		outp += sprintf(outp, "  count 0x%03X", extra_delta_offset32);
 	if (extra_delta_offset64)
-		outp += sprintf(outp, " COUNT 0x%03X ", extra_delta_offset64);
+		outp += sprintf(outp, "  COUNT 0x%03X", extra_delta_offset64);
 	if (extra_msr_offset32)
-		outp += sprintf(outp, "  MSR 0x%03X ", extra_msr_offset32);
+		outp += sprintf(outp, "   MSR 0x%03X", extra_msr_offset32);
 	if (extra_msr_offset64)
-		outp += sprintf(outp, "          MSR 0x%03X ", extra_msr_offset64);
+		outp += sprintf(outp, "           MSR 0x%03X", extra_msr_offset64);
 	if (do_nhm_cstates)
-		outp += sprintf(outp, " CPU%%c1 ");
+		outp += sprintf(outp, "  CPU%%c1");
 	if (do_nhm_cstates && !do_slm_cstates)
-		outp += sprintf(outp, " CPU%%c3 ");
+		outp += sprintf(outp, "  CPU%%c3");
 	if (do_nhm_cstates)
-		outp += sprintf(outp, " CPU%%c6 ");
+		outp += sprintf(outp, "  CPU%%c6");
 	if (do_snb_cstates)
-		outp += sprintf(outp, " CPU%%c7 ");
+		outp += sprintf(outp, "  CPU%%c7");
 
 	if (do_dts)
-		outp += sprintf(outp, "CoreTmp ");
+		outp += sprintf(outp, " CoreTmp");
 	if (do_ptm)
-		outp += sprintf(outp, " PkgTmp ");
+		outp += sprintf(outp, "  PkgTmp");
 
 	if (do_snb_cstates)
-		outp += sprintf(outp, "Pkg%%pc2 ");
+		outp += sprintf(outp, " Pkg%%pc2");
 	if (do_nhm_cstates && !do_slm_cstates)
-		outp += sprintf(outp, "Pkg%%pc3 ");
+		outp += sprintf(outp, " Pkg%%pc3");
 	if (do_nhm_cstates && !do_slm_cstates)
-		outp += sprintf(outp, "Pkg%%pc6 ");
+		outp += sprintf(outp, " Pkg%%pc6");
 	if (do_snb_cstates)
-		outp += sprintf(outp, "Pkg%%pc7 ");
+		outp += sprintf(outp, " Pkg%%pc7");
 	if (do_c8_c9_c10) {
-		outp += sprintf(outp, "Pkg%%pc8 ");
-		outp += sprintf(outp, "Pkg%%pc9 ");
-		outp += sprintf(outp, "Pk%%pc10 ");
+		outp += sprintf(outp, " Pkg%%pc8");
+		outp += sprintf(outp, " Pkg%%pc9");
+		outp += sprintf(outp, " Pk%%pc10");
 	}
 
 	if (do_rapl && !rapl_joules) {
 		if (do_rapl & RAPL_PKG)
-			outp += sprintf(outp, "PkgWatt ");
+			outp += sprintf(outp, " PkgWatt");
 		if (do_rapl & RAPL_CORES)
-			outp += sprintf(outp, "CorWatt ");
+			outp += sprintf(outp, " CorWatt");
 		if (do_rapl & RAPL_GFX)
-			outp += sprintf(outp, "GFXWatt ");
+			outp += sprintf(outp, " GFXWatt");
 		if (do_rapl & RAPL_DRAM)
-			outp += sprintf(outp, "RAMWatt ");
+			outp += sprintf(outp, " RAMWatt");
 		if (do_rapl & RAPL_PKG_PERF_STATUS)
-			outp += sprintf(outp, "  PKG_%% ");
+			outp += sprintf(outp, "   PKG_%%");
 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
-			outp += sprintf(outp, "  RAM_%% ");
+			outp += sprintf(outp, "   RAM_%%");
 	} else {
 		if (do_rapl & RAPL_PKG)
-			outp += sprintf(outp, "  Pkg_J ");
+			outp += sprintf(outp, "   Pkg_J");
 		if (do_rapl & RAPL_CORES)
-			outp += sprintf(outp, "  Cor_J ");
+			outp += sprintf(outp, "   Cor_J");
 		if (do_rapl & RAPL_GFX)
-			outp += sprintf(outp, "  GFX_J ");
+			outp += sprintf(outp, "   GFX_J");
 		if (do_rapl & RAPL_DRAM)
-			outp += sprintf(outp, "  RAM_W ");
+			outp += sprintf(outp, "   RAM_W");
 		if (do_rapl & RAPL_PKG_PERF_STATUS)
-			outp += sprintf(outp, "  PKG_%% ");
+			outp += sprintf(outp, "   PKG_%%");
 		if (do_rapl & RAPL_DRAM_PERF_STATUS)
-			outp += sprintf(outp, "  RAM_%% ");
-		outp += sprintf(outp, "  time ");
+			outp += sprintf(outp, "   RAM_%%");
+		outp += sprintf(outp, "   time");
 
 	}
 	outp += sprintf(outp, "\n");

diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
index bf06577..5819a27 100644
--- a/virt/kvm/assigned-dev.c
+++ b/virt/kvm/assigned-dev.c

@@ -526,8 +526,10 @@
 		dev->irq_requested_type |= guest_irq_type;
 		if (dev->ack_notifier.gsi != -1)
 			kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
-	} else
+	} else {
 		kvm_free_irq_source_id(kvm, dev->irq_source_id);
+		dev->irq_source_id = -1;
+	}
 
 	return r;
 }

diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index 0df7d4b..714b949 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c

@@ -61,6 +61,14 @@
 	return pfn;
 }
 
+static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
+{
+	unsigned long i;
+
+	for (i = 0; i < npages; ++i)
+		kvm_release_pfn_clean(pfn + i);
+}
+
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
 {
 	gfn_t gfn, end_gfn;
@@ -123,6 +131,7 @@
 		if (r) {
 			printk(KERN_ERR "kvm_iommu_map_address:"
 			       "iommu failed to map pfn=%llx\n", pfn);
+			kvm_unpin_pages(kvm, pfn, page_size);
 			goto unmap_pages;
 		}
 
@@ -134,7 +143,7 @@
 	return 0;
 
 unmap_pages:
-	kvm_iommu_put_pages(kvm, slot->base_gfn, gfn);
+	kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
 	return r;
 }
 
@@ -266,14 +275,6 @@
 	return r;
 }
 
-static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
-{
-	unsigned long i;
-
-	for (i = 0; i < npages; ++i)
-		kvm_release_pfn_clean(pfn + i);
-}
-
 static void kvm_iommu_put_pages(struct kvm *kvm,
 				gfn_t base_gfn, unsigned long npages)
 {
commit	3951ad2e051543f6cd01706da477a73f19165eb6	[log] [tgz]
author	Linus Torvalds <torvalds@linux-foundation.org>	Wed Aug 20 18:23:06 2014 -0500
committer	Linus Torvalds <torvalds@linux-foundation.org>	Wed Aug 20 18:23:06 2014 -0500
tree	34f7bfc370aa9c4fefb84bc3cc9e7e950406db5b
parent	e9de42d8eeffdc23af0144cafa9e3deacc489fb9 [diff]
parent	8039aabb6c9f802bca04cc77ca210060a5b53916 [diff]