Merge branch 'sh/clkfwk' into sh-latest
diff --git a/Documentation/power/devices.txt b/Documentation/power/devices.txt
index 8888083..64565aa 100644
--- a/Documentation/power/devices.txt
+++ b/Documentation/power/devices.txt
@@ -520,59 +520,20 @@
 device.  This field is a pointer to an object of type struct dev_power_domain,
 defined in include/linux/pm.h, providing a set of power management callbacks
 analogous to the subsystem-level and device driver callbacks that are executed
-for the given device during all power transitions, in addition to the respective
-subsystem-level callbacks.  Specifically, the power domain "suspend" callbacks
-(i.e. ->runtime_suspend(), ->suspend(), ->freeze(), ->poweroff(), etc.) are
-executed after the analogous subsystem-level callbacks, while the power domain
-"resume" callbacks (i.e. ->runtime_resume(), ->resume(), ->thaw(), ->restore,
-etc.) are executed before the analogous subsystem-level callbacks.  Error codes
-returned by the "suspend" and "resume" power domain callbacks are ignored.
+for the given device during all power transitions, instead of the respective
+subsystem-level callbacks.  Specifically, if a device's pm_domain pointer is
+not NULL, the ->suspend() callback from the object pointed to by it will be
+executed instead of its subsystem's (e.g. bus type's) ->suspend() callback and
+anlogously for all of the remaining callbacks.  In other words, power management
+domain callbacks, if defined for the given device, always take precedence over
+the callbacks provided by the device's subsystem (e.g. bus type).
 
-Power domain ->runtime_idle() callback is executed before the subsystem-level
-->runtime_idle() callback and the result returned by it is not ignored.  Namely,
-if it returns error code, the subsystem-level ->runtime_idle() callback will not
-be called and the helper function rpm_idle() executing it will return error
-code.  This mechanism is intended to help platforms where saving device state
-is a time consuming operation and should only be carried out if all devices
-in the power domain are idle, before turning off the shared power resource(s).
-Namely, the power domain ->runtime_idle() callback may return error code until
-the pm_runtime_idle() helper (or its asychronous version) has been called for
-all devices in the power domain (it is recommended that the returned error code
-be -EBUSY in those cases), preventing the subsystem-level ->runtime_idle()
-callback from being run prematurely.
-
-The support for device power domains is only relevant to platforms needing to
-use the same subsystem-level (e.g. platform bus type) and device driver power
-management callbacks in many different power domain configurations and wanting
-to avoid incorporating the support for power domains into the subsystem-level
-callbacks.  The other platforms need not implement it or take it into account
-in any way.
-
-
-System Devices
---------------
-System devices (sysdevs) follow a slightly different API, which can be found in
-
-	include/linux/sysdev.h
-	drivers/base/sys.c
-
-System devices will be suspended with interrupts disabled, and after all other
-devices have been suspended.  On resume, they will be resumed before any other
-devices, and also with interrupts disabled.  These things occur in special
-"sysdev_driver" phases, which affect only system devices.
-
-Thus, after the suspend_noirq (or freeze_noirq or poweroff_noirq) phase, when
-the non-boot CPUs are all offline and IRQs are disabled on the remaining online
-CPU, then a sysdev_driver.suspend phase is carried out, and the system enters a
-sleep state (or a system image is created).  During resume (or after the image
-has been created or loaded) a sysdev_driver.resume phase is carried out, IRQs
-are enabled on the only online CPU, the non-boot CPUs are enabled, and the
-resume_noirq (or thaw_noirq or restore_noirq) phase begins.
-
-Code to actually enter and exit the system-wide low power state sometimes
-involves hardware details that are only known to the boot firmware, and
-may leave a CPU running software (from SRAM or flash memory) that monitors
-the system and manages its wakeup sequence.
+The support for device power management domains is only relevant to platforms
+needing to use the same device driver power management callbacks in many
+different power domain configurations and wanting to avoid incorporating the
+support for power domains into subsystem-level callbacks, for example by
+modifying the platform bus type.  Other platforms need not implement it or take
+it into account in any way.
 
 
 Device Low Power (suspend) States
diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index 654097b..22accb3 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -566,11 +566,6 @@
 	pm_runtime_set_active(dev);
 	pm_runtime_enable(dev);
 
-The PM core always increments the run-time usage counter before calling the
-->prepare() callback and decrements it after calling the ->complete() callback.
-Hence disabling run-time PM temporarily like this will not cause any run-time
-suspend callbacks to be lost.
-
 7. Generic subsystem callbacks
 
 Subsystems may wish to conserve code space by using the set of generic power
diff --git a/arch/mn10300/include/asm/uaccess.h b/arch/mn10300/include/asm/uaccess.h
index 3d6e60d..780560b 100644
--- a/arch/mn10300/include/asm/uaccess.h
+++ b/arch/mn10300/include/asm/uaccess.h
@@ -15,6 +15,7 @@
  * User space memory access functions
  */
 #include <linux/thread_info.h>
+#include <linux/kernel.h>
 #include <asm/page.h>
 #include <asm/errno.h>
 
diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index eaa8a85..ad367c4 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -387,7 +387,7 @@
 	clknb = container_of(nb, struct pm_clk_notifier_block, nb);
 
 	switch (action) {
-	case BUS_NOTIFY_ADD_DEVICE:
+	case BUS_NOTIFY_BIND_DRIVER:
 		if (clknb->con_ids[0]) {
 			for (con_id = clknb->con_ids; *con_id; con_id++)
 				enable_clock(dev, *con_id);
@@ -395,7 +395,7 @@
 			enable_clock(dev, NULL);
 		}
 		break;
-	case BUS_NOTIFY_DEL_DEVICE:
+	case BUS_NOTIFY_UNBOUND_DRIVER:
 		if (clknb->con_ids[0]) {
 			for (con_id = clknb->con_ids; *con_id; con_id++)
 				disable_clock(dev, *con_id);
diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index aa632020..06f09bf 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -57,7 +57,8 @@
  */
 void device_pm_init(struct device *dev)
 {
-	dev->power.in_suspend = false;
+	dev->power.is_prepared = false;
+	dev->power.is_suspended = false;
 	init_completion(&dev->power.completion);
 	complete_all(&dev->power.completion);
 	dev->power.wakeup = NULL;
@@ -91,7 +92,7 @@
 	pr_debug("PM: Adding info for %s:%s\n",
 		 dev->bus ? dev->bus->name : "No Bus", dev_name(dev));
 	mutex_lock(&dpm_list_mtx);
-	if (dev->parent && dev->parent->power.in_suspend)
+	if (dev->parent && dev->parent->power.is_prepared)
 		dev_warn(dev, "parent %s should not be sleeping\n",
 			dev_name(dev->parent));
 	list_add_tail(&dev->power.entry, &dpm_list);
@@ -511,7 +512,14 @@
 	dpm_wait(dev->parent, async);
 	device_lock(dev);
 
-	dev->power.in_suspend = false;
+	/*
+	 * This is a fib.  But we'll allow new children to be added below
+	 * a resumed device, even if the device hasn't been completed yet.
+	 */
+	dev->power.is_prepared = false;
+
+	if (!dev->power.is_suspended)
+		goto Unlock;
 
 	if (dev->pwr_domain) {
 		pm_dev_dbg(dev, state, "power domain ");
@@ -548,6 +556,9 @@
 	}
 
  End:
+	dev->power.is_suspended = false;
+
+ Unlock:
 	device_unlock(dev);
 	complete_all(&dev->power.completion);
 
@@ -670,7 +681,7 @@
 		struct device *dev = to_device(dpm_prepared_list.prev);
 
 		get_device(dev);
-		dev->power.in_suspend = false;
+		dev->power.is_prepared = false;
 		list_move(&dev->power.entry, &list);
 		mutex_unlock(&dpm_list_mtx);
 
@@ -835,11 +846,11 @@
 	device_lock(dev);
 
 	if (async_error)
-		goto End;
+		goto Unlock;
 
 	if (pm_wakeup_pending()) {
 		async_error = -EBUSY;
-		goto End;
+		goto Unlock;
 	}
 
 	if (dev->pwr_domain) {
@@ -877,6 +888,9 @@
 	}
 
  End:
+	dev->power.is_suspended = !error;
+
+ Unlock:
 	device_unlock(dev);
 	complete_all(&dev->power.completion);
 
@@ -1042,7 +1056,7 @@
 			put_device(dev);
 			break;
 		}
-		dev->power.in_suspend = true;
+		dev->power.is_prepared = true;
 		if (!list_empty(&dev->power.entry))
 			list_move_tail(&dev->power.entry, &dpm_prepared_list);
 		put_device(dev);
diff --git a/drivers/infiniband/hw/cxgb4/cm.c b/drivers/infiniband/hw/cxgb4/cm.c
index f660cd0..31fb440 100644
--- a/drivers/infiniband/hw/cxgb4/cm.c
+++ b/drivers/infiniband/hw/cxgb4/cm.c
@@ -1463,9 +1463,9 @@
 	struct c4iw_qp_attributes attrs;
 	int disconnect = 1;
 	int release = 0;
-	int abort = 0;
 	struct tid_info *t = dev->rdev.lldi.tids;
 	unsigned int tid = GET_TID(hdr);
+	int ret;
 
 	ep = lookup_tid(t, tid);
 	PDBG("%s ep %p tid %u\n", __func__, ep, ep->hwtid);
@@ -1501,10 +1501,12 @@
 		start_ep_timer(ep);
 		__state_set(&ep->com, CLOSING);
 		attrs.next_state = C4IW_QP_STATE_CLOSING;
-		abort = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
+		ret = c4iw_modify_qp(ep->com.qp->rhp, ep->com.qp,
 				       C4IW_QP_ATTR_NEXT_STATE, &attrs, 1);
-		peer_close_upcall(ep);
-		disconnect = 1;
+		if (ret != -ECONNRESET) {
+			peer_close_upcall(ep);
+			disconnect = 1;
+		}
 		break;
 	case ABORTING:
 		disconnect = 0;
@@ -2109,15 +2111,16 @@
 		break;
 	}
 
-	mutex_unlock(&ep->com.mutex);
 	if (close) {
-		if (abrupt)
-			ret = abort_connection(ep, NULL, gfp);
-		else
+		if (abrupt) {
+			close_complete_upcall(ep);
+			ret = send_abort(ep, NULL, gfp);
+		} else
 			ret = send_halfclose(ep, gfp);
 		if (ret)
 			fatal = 1;
 	}
+	mutex_unlock(&ep->com.mutex);
 	if (fatal)
 		release_ep_resources(ep);
 	return ret;
@@ -2301,6 +2304,31 @@
 	return 0;
 }
 
+static int peer_abort_intr(struct c4iw_dev *dev, struct sk_buff *skb)
+{
+	struct cpl_abort_req_rss *req = cplhdr(skb);
+	struct c4iw_ep *ep;
+	struct tid_info *t = dev->rdev.lldi.tids;
+	unsigned int tid = GET_TID(req);
+
+	ep = lookup_tid(t, tid);
+	if (is_neg_adv_abort(req->status)) {
+		PDBG("%s neg_adv_abort ep %p tid %u\n", __func__, ep,
+		     ep->hwtid);
+		kfree_skb(skb);
+		return 0;
+	}
+	PDBG("%s ep %p tid %u state %u\n", __func__, ep, ep->hwtid,
+	     ep->com.state);
+
+	/*
+	 * Wake up any threads in rdma_init() or rdma_fini().
+	 */
+	c4iw_wake_up(&ep->com.wr_wait, -ECONNRESET);
+	sched(dev, skb);
+	return 0;
+}
+
 /*
  * Most upcalls from the T4 Core go to sched() to
  * schedule the processing on a work queue.
@@ -2317,7 +2345,7 @@
 	[CPL_PASS_ESTABLISH] = sched,
 	[CPL_PEER_CLOSE] = sched,
 	[CPL_CLOSE_CON_RPL] = sched,
-	[CPL_ABORT_REQ_RSS] = sched,
+	[CPL_ABORT_REQ_RSS] = peer_abort_intr,
 	[CPL_RDMA_TERMINATE] = sched,
 	[CPL_FW4_ACK] = sched,
 	[CPL_SET_TCB_RPL] = set_tcb_rpl,
diff --git a/drivers/infiniband/hw/cxgb4/cq.c b/drivers/infiniband/hw/cxgb4/cq.c
index 8d8f8ad..1720dc7 100644
--- a/drivers/infiniband/hw/cxgb4/cq.c
+++ b/drivers/infiniband/hw/cxgb4/cq.c
@@ -801,6 +801,10 @@
 	if (ucontext) {
 		memsize = roundup(memsize, PAGE_SIZE);
 		hwentries = memsize / sizeof *chp->cq.queue;
+		while (hwentries > T4_MAX_IQ_SIZE) {
+			memsize -= PAGE_SIZE;
+			hwentries = memsize / sizeof *chp->cq.queue;
+		}
 	}
 	chp->cq.size = hwentries;
 	chp->cq.memsize = memsize;
diff --git a/drivers/infiniband/hw/cxgb4/mem.c b/drivers/infiniband/hw/cxgb4/mem.c
index 273ffe4..0347eed 100644
--- a/drivers/infiniband/hw/cxgb4/mem.c
+++ b/drivers/infiniband/hw/cxgb4/mem.c
@@ -625,7 +625,7 @@
 	mhp->attr.perms = c4iw_ib_to_tpt_access(acc);
 	mhp->attr.va_fbo = virt;
 	mhp->attr.page_size = shift - 12;
-	mhp->attr.len = (u32) length;
+	mhp->attr.len = length;
 
 	err = register_mem(rhp, php, mhp, shift);
 	if (err)
diff --git a/drivers/infiniband/hw/cxgb4/qp.c b/drivers/infiniband/hw/cxgb4/qp.c
index 3b773b0..a41578e 100644
--- a/drivers/infiniband/hw/cxgb4/qp.c
+++ b/drivers/infiniband/hw/cxgb4/qp.c
@@ -1207,11 +1207,8 @@
 				c4iw_get_ep(&qhp->ep->com);
 			}
 			ret = rdma_fini(rhp, qhp, ep);
-			if (ret) {
-				if (internal)
-					c4iw_get_ep(&qhp->ep->com);
+			if (ret)
 				goto err;
-			}
 			break;
 		case C4IW_QP_STATE_TERMINATE:
 			set_state(qhp, C4IW_QP_STATE_TERMINATE);
diff --git a/drivers/infiniband/hw/qib/qib_iba7322.c b/drivers/infiniband/hw/qib/qib_iba7322.c
index 9f53e68..8ec5237 100644
--- a/drivers/infiniband/hw/qib/qib_iba7322.c
+++ b/drivers/infiniband/hw/qib/qib_iba7322.c
@@ -469,6 +469,8 @@
 #define IB_7322_LT_STATE_RECOVERIDLE     0x0f
 #define IB_7322_LT_STATE_CFGENH          0x10
 #define IB_7322_LT_STATE_CFGTEST         0x11
+#define IB_7322_LT_STATE_CFGWAITRMTTEST  0x12
+#define IB_7322_LT_STATE_CFGWAITENH      0x13
 
 /* link state machine states from IBC */
 #define IB_7322_L_STATE_DOWN             0x0
@@ -498,8 +500,10 @@
 		IB_PHYSPORTSTATE_LINK_ERR_RECOVER,
 	[IB_7322_LT_STATE_CFGENH] = IB_PHYSPORTSTATE_CFG_ENH,
 	[IB_7322_LT_STATE_CFGTEST] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x12] = IB_PHYSPORTSTATE_CFG_TRAIN,
-	[0x13] = IB_PHYSPORTSTATE_CFG_WAIT_ENH,
+	[IB_7322_LT_STATE_CFGWAITRMTTEST] =
+		IB_PHYSPORTSTATE_CFG_TRAIN,
+	[IB_7322_LT_STATE_CFGWAITENH] =
+		IB_PHYSPORTSTATE_CFG_WAIT_ENH,
 	[0x14] = IB_PHYSPORTSTATE_CFG_TRAIN,
 	[0x15] = IB_PHYSPORTSTATE_CFG_TRAIN,
 	[0x16] = IB_PHYSPORTSTATE_CFG_TRAIN,
@@ -1692,7 +1696,9 @@
 		break;
 	}
 
-	if (ibclt == IB_7322_LT_STATE_CFGTEST &&
+	if (((ibclt >= IB_7322_LT_STATE_CFGTEST &&
+	      ibclt <= IB_7322_LT_STATE_CFGWAITENH) ||
+	     ibclt == IB_7322_LT_STATE_LINKUP) &&
 	    (ibcst & SYM_MASK(IBCStatusA_0, LinkSpeedQDR))) {
 		force_h1(ppd);
 		ppd->cpspec->qdr_reforce = 1;
@@ -7301,12 +7307,17 @@
 static void serdes_7322_los_enable(struct qib_pportdata *ppd, int enable)
 {
 	u64 data = qib_read_kreg_port(ppd, krp_serdesctrl);
-	printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS %s\n",
-		ppd->dd->unit, ppd->port, (enable ? "on" : "off"));
-	if (enable)
+	u8 state = SYM_FIELD(data, IBSerdesCtrl_0, RXLOSEN);
+
+	if (enable && !state) {
+		printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS on\n",
+			ppd->dd->unit, ppd->port);
 		data |= SYM_MASK(IBSerdesCtrl_0, RXLOSEN);
-	else
+	} else if (!enable && state) {
+		printk(KERN_INFO QIB_DRV_NAME " IB%u:%u Turning LOS off\n",
+			ppd->dd->unit, ppd->port);
 		data &= ~SYM_MASK(IBSerdesCtrl_0, RXLOSEN);
+	}
 	qib_write_kreg_port(ppd, krp_serdesctrl, data);
 }
 
diff --git a/drivers/infiniband/hw/qib/qib_intr.c b/drivers/infiniband/hw/qib/qib_intr.c
index a693c56..6ae57d2 100644
--- a/drivers/infiniband/hw/qib/qib_intr.c
+++ b/drivers/infiniband/hw/qib/qib_intr.c
@@ -96,8 +96,12 @@
 	 * states, or if it transitions from any of the up (INIT or better)
 	 * states into any of the down states (except link recovery), then
 	 * call the chip-specific code to take appropriate actions.
+	 *
+	 * ppd->lflags could be 0 if this is the first time the interrupt
+	 * handlers has been called but the link is already up.
 	 */
-	if (lstate >= IB_PORT_INIT && (ppd->lflags & QIBL_LINKDOWN) &&
+	if (lstate >= IB_PORT_INIT &&
+	    (!ppd->lflags || (ppd->lflags & QIBL_LINKDOWN)) &&
 	    ltstate == IB_PHYSPORTSTATE_LINKUP) {
 		/* transitioned to UP */
 		if (dd->f_ib_updown(ppd, 1, ibcs))
diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c
index 135df16..46767c5 100644
--- a/drivers/pci/pci-driver.c
+++ b/drivers/pci/pci-driver.c
@@ -624,7 +624,7 @@
 	 * system from the sleep state, we'll have to prevent it from signaling
 	 * wake-up.
 	 */
-	pm_runtime_resume(dev);
+	pm_runtime_get_sync(dev);
 
 	if (drv && drv->pm && drv->pm->prepare)
 		error = drv->pm->prepare(dev);
@@ -638,6 +638,8 @@
 
 	if (drv && drv->pm && drv->pm->complete)
 		drv->pm->complete(dev);
+
+	pm_runtime_put_sync(dev);
 }
 
 #else /* !CONFIG_PM_SLEEP */
diff --git a/drivers/sh/clk/core.c b/drivers/sh/clk/core.c
index 7e9c399..ebeaa9e 100644
--- a/drivers/sh/clk/core.c
+++ b/drivers/sh/clk/core.c
@@ -34,6 +34,9 @@
 static DEFINE_SPINLOCK(clock_lock);
 static DEFINE_MUTEX(clock_list_sem);
 
+/* clock disable operations are not passed on to hardware during boot */
+static int allow_disable;
+
 void clk_rate_table_build(struct clk *clk,
 			  struct cpufreq_frequency_table *freq_table,
 			  int nr_freqs,
@@ -228,7 +231,7 @@
 		return;
 
 	if (!(--clk->usecount)) {
-		if (likely(clk->ops && clk->ops->disable))
+		if (likely(allow_disable && clk->ops && clk->ops->disable))
 			clk->ops->disable(clk);
 		if (likely(clk->parent))
 			__clk_disable(clk->parent);
@@ -747,3 +750,25 @@
 	return err;
 }
 late_initcall(clk_debugfs_init);
+
+static int __init clk_late_init(void)
+{
+	unsigned long flags;
+	struct clk *clk;
+
+	/* disable all clocks with zero use count */
+	mutex_lock(&clock_list_sem);
+	spin_lock_irqsave(&clock_lock, flags);
+
+	list_for_each_entry(clk, &clock_list, node)
+		if (!clk->usecount && clk->ops && clk->ops->disable)
+			clk->ops->disable(clk);
+
+	/* from now on allow clock disable operations */
+	allow_disable = 1;
+
+	spin_unlock_irqrestore(&clock_lock, flags);
+	mutex_unlock(&clock_list_sem);
+	return 0;
+}
+late_initcall(clk_late_init);
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index e35a176..aa3cc46 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -375,7 +375,7 @@
 		 * Just re-enable it without affecting the endpoint toggles.
 		 */
 		usb_enable_interface(udev, intf, false);
-	} else if (!error && !intf->dev.power.in_suspend) {
+	} else if (!error && !intf->dev.power.is_prepared) {
 		r = usb_set_interface(udev, intf->altsetting[0].
 				desc.bInterfaceNumber, 0);
 		if (r < 0)
@@ -960,7 +960,7 @@
 	}
 
 	/* Try to rebind the interface */
-	if (!intf->dev.power.in_suspend) {
+	if (!intf->dev.power.is_prepared) {
 		intf->needs_binding = 0;
 		rc = device_attach(&intf->dev);
 		if (rc < 0)
@@ -1107,7 +1107,7 @@
 	if (intf->condition == USB_INTERFACE_UNBOUND) {
 
 		/* Carry out a deferred switch to altsetting 0 */
-		if (intf->needs_altsetting0 && !intf->dev.power.in_suspend) {
+		if (intf->needs_altsetting0 && !intf->dev.power.is_prepared) {
 			usb_set_interface(udev, intf->altsetting[0].
 					desc.bInterfaceNumber, 0);
 			intf->needs_altsetting0 = 0;
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 2e29abb..095c36f 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -125,7 +125,7 @@
  * positive retcode - signal for ext4_ext_walk_space(), see below
  * callback must return valid extent (passed or newly created)
  */
-typedef int (*ext_prepare_callback)(struct inode *, struct ext4_ext_path *,
+typedef int (*ext_prepare_callback)(struct inode *, ext4_lblk_t,
 					struct ext4_ext_cache *,
 					struct ext4_extent *, void *);
 
@@ -133,8 +133,11 @@
 #define EXT_BREAK      1
 #define EXT_REPEAT     2
 
-/* Maximum logical block in a file; ext4_extent's ee_block is __le32 */
-#define EXT_MAX_BLOCK	0xffffffff
+/*
+ * Maximum number of logical blocks in a file; ext4_extent's ee_block is
+ * __le32.
+ */
+#define EXT_MAX_BLOCKS	0xffffffff
 
 /*
  * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 5199bac..f815cc8 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1408,7 +1408,7 @@
 
 /*
  * ext4_ext_next_allocated_block:
- * returns allocated block in subsequent extent or EXT_MAX_BLOCK.
+ * returns allocated block in subsequent extent or EXT_MAX_BLOCKS.
  * NOTE: it considers block number from index entry as
  * allocated block. Thus, index entries have to be consistent
  * with leaves.
@@ -1422,7 +1422,7 @@
 	depth = path->p_depth;
 
 	if (depth == 0 && path->p_ext == NULL)
-		return EXT_MAX_BLOCK;
+		return EXT_MAX_BLOCKS;
 
 	while (depth >= 0) {
 		if (depth == path->p_depth) {
@@ -1439,12 +1439,12 @@
 		depth--;
 	}
 
-	return EXT_MAX_BLOCK;
+	return EXT_MAX_BLOCKS;
 }
 
 /*
  * ext4_ext_next_leaf_block:
- * returns first allocated block from next leaf or EXT_MAX_BLOCK
+ * returns first allocated block from next leaf or EXT_MAX_BLOCKS
  */
 static ext4_lblk_t ext4_ext_next_leaf_block(struct inode *inode,
 					struct ext4_ext_path *path)
@@ -1456,7 +1456,7 @@
 
 	/* zero-tree has no leaf blocks at all */
 	if (depth == 0)
-		return EXT_MAX_BLOCK;
+		return EXT_MAX_BLOCKS;
 
 	/* go to index block */
 	depth--;
@@ -1469,7 +1469,7 @@
 		depth--;
 	}
 
-	return EXT_MAX_BLOCK;
+	return EXT_MAX_BLOCKS;
 }
 
 /*
@@ -1677,13 +1677,13 @@
 	 */
 	if (b2 < b1) {
 		b2 = ext4_ext_next_allocated_block(path);
-		if (b2 == EXT_MAX_BLOCK)
+		if (b2 == EXT_MAX_BLOCKS)
 			goto out;
 	}
 
 	/* check for wrap through zero on extent logical start block*/
 	if (b1 + len1 < b1) {
-		len1 = EXT_MAX_BLOCK - b1;
+		len1 = EXT_MAX_BLOCKS - b1;
 		newext->ee_len = cpu_to_le16(len1);
 		ret = 1;
 	}
@@ -1767,7 +1767,7 @@
 	fex = EXT_LAST_EXTENT(eh);
 	next = ext4_ext_next_leaf_block(inode, path);
 	if (le32_to_cpu(newext->ee_block) > le32_to_cpu(fex->ee_block)
-	    && next != EXT_MAX_BLOCK) {
+	    && next != EXT_MAX_BLOCKS) {
 		ext_debug("next leaf block - %d\n", next);
 		BUG_ON(npath != NULL);
 		npath = ext4_ext_find_extent(inode, next, NULL);
@@ -1887,7 +1887,7 @@
 	BUG_ON(func == NULL);
 	BUG_ON(inode == NULL);
 
-	while (block < last && block != EXT_MAX_BLOCK) {
+	while (block < last && block != EXT_MAX_BLOCKS) {
 		num = last - block;
 		/* find extent for this block */
 		down_read(&EXT4_I(inode)->i_data_sem);
@@ -1958,7 +1958,7 @@
 			err = -EIO;
 			break;
 		}
-		err = func(inode, path, &cbex, ex, cbdata);
+		err = func(inode, next, &cbex, ex, cbdata);
 		ext4_ext_drop_refs(path);
 
 		if (err < 0)
@@ -2020,7 +2020,7 @@
 	if (ex == NULL) {
 		/* there is no extent yet, so gap is [0;-] */
 		lblock = 0;
-		len = EXT_MAX_BLOCK;
+		len = EXT_MAX_BLOCKS;
 		ext_debug("cache gap(whole file):");
 	} else if (block < le32_to_cpu(ex->ee_block)) {
 		lblock = block;
@@ -2350,7 +2350,7 @@
 			 * never happen because at least one of the end points
 			 * needs to be on the edge of the extent.
 			 */
-			if (end == EXT_MAX_BLOCK) {
+			if (end == EXT_MAX_BLOCKS - 1) {
 				ext_debug("  bad truncate %u:%u\n",
 						start, end);
 				block = 0;
@@ -2398,7 +2398,7 @@
 			 * If this is a truncate, this condition
 			 * should never happen
 			 */
-			if (end == EXT_MAX_BLOCK) {
+			if (end == EXT_MAX_BLOCKS - 1) {
 				ext_debug("  bad truncate %u:%u\n",
 					start, end);
 				err = -EIO;
@@ -2478,7 +2478,7 @@
 		 * we need to remove it from the leaf
 		 */
 		if (num == 0) {
-			if (end != EXT_MAX_BLOCK) {
+			if (end != EXT_MAX_BLOCKS - 1) {
 				/*
 				 * For hole punching, we need to scoot all the
 				 * extents up when an extent is removed so that
@@ -3699,7 +3699,7 @@
 
 	last_block = (inode->i_size + sb->s_blocksize - 1)
 			>> EXT4_BLOCK_SIZE_BITS(sb);
-	err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCK);
+	err = ext4_ext_remove_space(inode, last_block, EXT_MAX_BLOCKS - 1);
 
 	/* In a multi-transaction truncate, we only make the final
 	 * transaction synchronous.
@@ -3914,14 +3914,13 @@
 /*
  * Callback function called for each extent to gather FIEMAP information.
  */
-static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path,
+static int ext4_ext_fiemap_cb(struct inode *inode, ext4_lblk_t next,
 		       struct ext4_ext_cache *newex, struct ext4_extent *ex,
 		       void *data)
 {
 	__u64	logical;
 	__u64	physical;
 	__u64	length;
-	loff_t	size;
 	__u32	flags = 0;
 	int		ret = 0;
 	struct fiemap_extent_info *fieinfo = data;
@@ -4103,8 +4102,7 @@
 	if (ex && ext4_ext_is_uninitialized(ex))
 		flags |= FIEMAP_EXTENT_UNWRITTEN;
 
-	size = i_size_read(inode);
-	if (logical + length >= size)
+	if (next == EXT_MAX_BLOCKS)
 		flags |= FIEMAP_EXTENT_LAST;
 
 	ret = fiemap_fill_next_extent(fieinfo, logical, physical,
@@ -4347,8 +4345,8 @@
 
 		start_blk = start >> inode->i_sb->s_blocksize_bits;
 		last_blk = (start + len - 1) >> inode->i_sb->s_blocksize_bits;
-		if (last_blk >= EXT_MAX_BLOCK)
-			last_blk = EXT_MAX_BLOCK-1;
+		if (last_blk >= EXT_MAX_BLOCKS)
+			last_blk = EXT_MAX_BLOCKS-1;
 		len_blks = ((ext4_lblk_t) last_blk) - start_blk + 1;
 
 		/*
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index a5763e3..e3126c0 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -2634,7 +2634,7 @@
 	struct buffer_head *page_bufs = NULL;
 	struct inode *inode = page->mapping->host;
 
-	trace_ext4_writepage(inode, page);
+	trace_ext4_writepage(page);
 	size = i_size_read(inode);
 	if (page->index == size >> PAGE_CACHE_SHIFT)
 		len = size & ~PAGE_CACHE_MASK;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 859f2ae..6ed859d 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3578,8 +3578,8 @@
 		free += next - bit;
 
 		trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
-		trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
-					       grp_blk_start + bit, next - bit);
+		trace_ext4_mb_release_inode_pa(pa, grp_blk_start + bit,
+					       next - bit);
 		mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
 		bit = next + 1;
 	}
@@ -3608,7 +3608,7 @@
 	ext4_group_t group;
 	ext4_grpblk_t bit;
 
-	trace_ext4_mb_release_group_pa(sb, pa);
+	trace_ext4_mb_release_group_pa(pa);
 	BUG_ON(pa->pa_deleted == 0);
 	ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
 	BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
@@ -4448,7 +4448,7 @@
  * @inode:		inode
  * @block:		start physical block to free
  * @count:		number of blocks to count
- * @metadata: 		Are these metadata blocks
+ * @flags:		flags used by ext4_free_blocks
  */
 void ext4_free_blocks(handle_t *handle, struct inode *inode,
 		      struct buffer_head *bh, ext4_fsblk_t block,
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 2b8304b..f57455a 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -1002,12 +1002,12 @@
 		return -EINVAL;
 	}
 
-	if ((orig_start > EXT_MAX_BLOCK) ||
-	    (donor_start > EXT_MAX_BLOCK) ||
-	    (*len > EXT_MAX_BLOCK) ||
-	    (orig_start + *len > EXT_MAX_BLOCK))  {
+	if ((orig_start >= EXT_MAX_BLOCKS) ||
+	    (donor_start >= EXT_MAX_BLOCKS) ||
+	    (*len > EXT_MAX_BLOCKS) ||
+	    (orig_start + *len >= EXT_MAX_BLOCKS))  {
 		ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
-			"[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCK,
+			"[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
 			orig_inode->i_ino, donor_inode->i_ino);
 		return -EINVAL;
 	}
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index cc5c157..9ea71aa 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -2243,6 +2243,12 @@
  * in the vfs.  ext4 inode has 48 bits of i_block in fsblock units,
  * so that won't be a limiting factor.
  *
+ * However there is other limiting factor. We do store extents in the form
+ * of starting block and length, hence the resulting length of the extent
+ * covering maximum file size must fit into on-disk format containers as
+ * well. Given that length is always by 1 unit bigger than max unit (because
+ * we count 0 as well) we have to lower the s_maxbytes by one fs block.
+ *
  * Note, this does *not* consider any metadata overhead for vfs i_blocks.
  */
 static loff_t ext4_max_size(int blkbits, int has_huge_files)
@@ -2264,10 +2270,13 @@
 		upper_limit <<= blkbits;
 	}
 
-	/* 32-bit extent-start container, ee_block */
-	res = 1LL << 32;
+	/*
+	 * 32-bit extent-start container, ee_block. We lower the maxbytes
+	 * by one fs block, so ee_len can cover the extent of maximum file
+	 * size
+	 */
+	res = (1LL << 32) - 1;
 	res <<= blkbits;
-	res -= 1;
 
 	/* Sanity check against vm- & vfs- imposed limits */
 	if (res > upper_limit)
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 6a79fd0..2c62c5a 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -97,10 +97,14 @@
 
 	if (jh->b_jlist == BJ_None && !buffer_locked(bh) &&
 	    !buffer_dirty(bh) && !buffer_write_io_error(bh)) {
+		/*
+		 * Get our reference so that bh cannot be freed before
+		 * we unlock it
+		 */
+		get_bh(bh);
 		JBUFFER_TRACE(jh, "remove from checkpoint list");
 		ret = __jbd2_journal_remove_checkpoint(jh) + 1;
 		jbd_unlock_bh_state(bh);
-		jbd2_journal_remove_journal_head(bh);
 		BUFFER_TRACE(bh, "release");
 		__brelse(bh);
 	} else {
@@ -223,8 +227,8 @@
 			spin_lock(&journal->j_list_lock);
 			goto restart;
 		}
+		get_bh(bh);
 		if (buffer_locked(bh)) {
-			atomic_inc(&bh->b_count);
 			spin_unlock(&journal->j_list_lock);
 			jbd_unlock_bh_state(bh);
 			wait_on_buffer(bh);
@@ -243,7 +247,6 @@
 		 */
 		released = __jbd2_journal_remove_checkpoint(jh);
 		jbd_unlock_bh_state(bh);
-		jbd2_journal_remove_journal_head(bh);
 		__brelse(bh);
 	}
 
@@ -284,7 +287,7 @@
 	int ret = 0;
 
 	if (buffer_locked(bh)) {
-		atomic_inc(&bh->b_count);
+		get_bh(bh);
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
 		wait_on_buffer(bh);
@@ -316,12 +319,12 @@
 		ret = 1;
 		if (unlikely(buffer_write_io_error(bh)))
 			ret = -EIO;
+		get_bh(bh);
 		J_ASSERT_JH(jh, !buffer_jbddirty(bh));
 		BUFFER_TRACE(bh, "remove from checkpoint");
 		__jbd2_journal_remove_checkpoint(jh);
 		spin_unlock(&journal->j_list_lock);
 		jbd_unlock_bh_state(bh);
-		jbd2_journal_remove_journal_head(bh);
 		__brelse(bh);
 	} else {
 		/*
@@ -554,7 +557,8 @@
 /*
  * journal_clean_one_cp_list
  *
- * Find all the written-back checkpoint buffers in the given list and release them.
+ * Find all the written-back checkpoint buffers in the given list and
+ * release them.
  *
  * Called with the journal locked.
  * Called with j_list_lock held.
@@ -663,8 +667,8 @@
  * checkpoint lists.
  *
  * The function returns 1 if it frees the transaction, 0 otherwise.
+ * The function can free jh and bh.
  *
- * This function is called with the journal locked.
  * This function is called with j_list_lock held.
  * This function is called with jbd_lock_bh_state(jh2bh(jh))
  */
@@ -684,13 +688,14 @@
 	}
 	journal = transaction->t_journal;
 
+	JBUFFER_TRACE(jh, "removing from transaction");
 	__buffer_unlink(jh);
 	jh->b_cp_transaction = NULL;
+	jbd2_journal_put_journal_head(jh);
 
 	if (transaction->t_checkpoint_list != NULL ||
 	    transaction->t_checkpoint_io_list != NULL)
 		goto out;
-	JBUFFER_TRACE(jh, "transaction has no more buffers");
 
 	/*
 	 * There is one special case to worry about: if we have just pulled the
@@ -701,10 +706,8 @@
 	 * The locking here around t_state is a bit sleazy.
 	 * See the comment at the end of jbd2_journal_commit_transaction().
 	 */
-	if (transaction->t_state != T_FINISHED) {
-		JBUFFER_TRACE(jh, "belongs to running/committing transaction");
+	if (transaction->t_state != T_FINISHED)
 		goto out;
-	}
 
 	/* OK, that was the last buffer for the transaction: we can now
 	   safely remove this transaction from the log */
@@ -723,7 +726,6 @@
 	wake_up(&journal->j_wait_logspace);
 	ret = 1;
 out:
-	JBUFFER_TRACE(jh, "exit");
 	return ret;
 }
 
@@ -742,6 +744,8 @@
 	J_ASSERT_JH(jh, buffer_dirty(jh2bh(jh)) || buffer_jbddirty(jh2bh(jh)));
 	J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
 
+	/* Get reference for checkpointing transaction */
+	jbd2_journal_grab_journal_head(jh2bh(jh));
 	jh->b_cp_transaction = transaction;
 
 	if (!transaction->t_checkpoint_list) {
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 7f21cf3..eef6979 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -848,10 +848,16 @@
 	while (commit_transaction->t_forget) {
 		transaction_t *cp_transaction;
 		struct buffer_head *bh;
+		int try_to_free = 0;
 
 		jh = commit_transaction->t_forget;
 		spin_unlock(&journal->j_list_lock);
 		bh = jh2bh(jh);
+		/*
+		 * Get a reference so that bh cannot be freed before we are
+		 * done with it.
+		 */
+		get_bh(bh);
 		jbd_lock_bh_state(bh);
 		J_ASSERT_JH(jh,	jh->b_transaction == commit_transaction);
 
@@ -914,28 +920,27 @@
 			__jbd2_journal_insert_checkpoint(jh, commit_transaction);
 			if (is_journal_aborted(journal))
 				clear_buffer_jbddirty(bh);
-			JBUFFER_TRACE(jh, "refile for checkpoint writeback");
-			__jbd2_journal_refile_buffer(jh);
-			jbd_unlock_bh_state(bh);
 		} else {
 			J_ASSERT_BH(bh, !buffer_dirty(bh));
-			/* The buffer on BJ_Forget list and not jbddirty means
+			/*
+			 * The buffer on BJ_Forget list and not jbddirty means
 			 * it has been freed by this transaction and hence it
 			 * could not have been reallocated until this
 			 * transaction has committed. *BUT* it could be
 			 * reallocated once we have written all the data to
 			 * disk and before we process the buffer on BJ_Forget
-			 * list. */
-			JBUFFER_TRACE(jh, "refile or unfile freed buffer");
-			__jbd2_journal_refile_buffer(jh);
-			if (!jh->b_transaction) {
-				jbd_unlock_bh_state(bh);
-				 /* needs a brelse */
-				jbd2_journal_remove_journal_head(bh);
-				release_buffer_page(bh);
-			} else
-				jbd_unlock_bh_state(bh);
+			 * list.
+			 */
+			if (!jh->b_next_transaction)
+				try_to_free = 1;
 		}
+		JBUFFER_TRACE(jh, "refile or unfile buffer");
+		__jbd2_journal_refile_buffer(jh);
+		jbd_unlock_bh_state(bh);
+		if (try_to_free)
+			release_buffer_page(bh);	/* Drops bh reference */
+		else
+			__brelse(bh);
 		cond_resched_lock(&journal->j_list_lock);
 	}
 	spin_unlock(&journal->j_list_lock);
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 9a78269..0dfa5b59 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -2078,10 +2078,9 @@
  * When a buffer has its BH_JBD bit set it is immune from being released by
  * core kernel code, mainly via ->b_count.
  *
- * A journal_head may be detached from its buffer_head when the journal_head's
- * b_transaction, b_cp_transaction and b_next_transaction pointers are NULL.
- * Various places in JBD call jbd2_journal_remove_journal_head() to indicate that the
- * journal_head can be dropped if needed.
+ * A journal_head is detached from its buffer_head when the journal_head's
+ * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
+ * transaction (b_cp_transaction) hold their references to b_jcount.
  *
  * Various places in the kernel want to attach a journal_head to a buffer_head
  * _before_ attaching the journal_head to a transaction.  To protect the
@@ -2094,17 +2093,16 @@
  *	(Attach a journal_head if needed.  Increments b_jcount)
  *	struct journal_head *jh = jbd2_journal_add_journal_head(bh);
  *	...
+ *      (Get another reference for transaction)
+ *	jbd2_journal_grab_journal_head(bh);
  *	jh->b_transaction = xxx;
+ *	(Put original reference)
  *	jbd2_journal_put_journal_head(jh);
- *
- * Now, the journal_head's b_jcount is zero, but it is safe from being released
- * because it has a non-zero b_transaction.
  */
 
 /*
  * Give a buffer_head a journal_head.
  *
- * Doesn't need the journal lock.
  * May sleep.
  */
 struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
@@ -2168,61 +2166,29 @@
 	struct journal_head *jh = bh2jh(bh);
 
 	J_ASSERT_JH(jh, jh->b_jcount >= 0);
-
-	get_bh(bh);
-	if (jh->b_jcount == 0) {
-		if (jh->b_transaction == NULL &&
-				jh->b_next_transaction == NULL &&
-				jh->b_cp_transaction == NULL) {
-			J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
-			J_ASSERT_BH(bh, buffer_jbd(bh));
-			J_ASSERT_BH(bh, jh2bh(jh) == bh);
-			BUFFER_TRACE(bh, "remove journal_head");
-			if (jh->b_frozen_data) {
-				printk(KERN_WARNING "%s: freeing "
-						"b_frozen_data\n",
-						__func__);
-				jbd2_free(jh->b_frozen_data, bh->b_size);
-			}
-			if (jh->b_committed_data) {
-				printk(KERN_WARNING "%s: freeing "
-						"b_committed_data\n",
-						__func__);
-				jbd2_free(jh->b_committed_data, bh->b_size);
-			}
-			bh->b_private = NULL;
-			jh->b_bh = NULL;	/* debug, really */
-			clear_buffer_jbd(bh);
-			__brelse(bh);
-			journal_free_journal_head(jh);
-		} else {
-			BUFFER_TRACE(bh, "journal_head was locked");
-		}
+	J_ASSERT_JH(jh, jh->b_transaction == NULL);
+	J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
+	J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
+	J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
+	J_ASSERT_BH(bh, buffer_jbd(bh));
+	J_ASSERT_BH(bh, jh2bh(jh) == bh);
+	BUFFER_TRACE(bh, "remove journal_head");
+	if (jh->b_frozen_data) {
+		printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
+		jbd2_free(jh->b_frozen_data, bh->b_size);
 	}
+	if (jh->b_committed_data) {
+		printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
+		jbd2_free(jh->b_committed_data, bh->b_size);
+	}
+	bh->b_private = NULL;
+	jh->b_bh = NULL;	/* debug, really */
+	clear_buffer_jbd(bh);
+	journal_free_journal_head(jh);
 }
 
 /*
- * jbd2_journal_remove_journal_head(): if the buffer isn't attached to a transaction
- * and has a zero b_jcount then remove and release its journal_head.   If we did
- * see that the buffer is not used by any transaction we also "logically"
- * decrement ->b_count.
- *
- * We in fact take an additional increment on ->b_count as a convenience,
- * because the caller usually wants to do additional things with the bh
- * after calling here.
- * The caller of jbd2_journal_remove_journal_head() *must* run __brelse(bh) at some
- * time.  Once the caller has run __brelse(), the buffer is eligible for
- * reaping by try_to_free_buffers().
- */
-void jbd2_journal_remove_journal_head(struct buffer_head *bh)
-{
-	jbd_lock_bh_journal_head(bh);
-	__journal_remove_journal_head(bh);
-	jbd_unlock_bh_journal_head(bh);
-}
-
-/*
- * Drop a reference on the passed journal_head.  If it fell to zero then try to
+ * Drop a reference on the passed journal_head.  If it fell to zero then
  * release the journal_head from the buffer_head.
  */
 void jbd2_journal_put_journal_head(struct journal_head *jh)
@@ -2232,11 +2198,12 @@
 	jbd_lock_bh_journal_head(bh);
 	J_ASSERT_JH(jh, jh->b_jcount > 0);
 	--jh->b_jcount;
-	if (!jh->b_jcount && !jh->b_transaction) {
+	if (!jh->b_jcount) {
 		__journal_remove_journal_head(bh);
+		jbd_unlock_bh_journal_head(bh);
 		__brelse(bh);
-	}
-	jbd_unlock_bh_journal_head(bh);
+	} else
+		jbd_unlock_bh_journal_head(bh);
 }
 
 /*
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 3eec82d..2d71094 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -30,6 +30,7 @@
 #include <linux/module.h>
 
 static void __jbd2_journal_temp_unlink_buffer(struct journal_head *jh);
+static void __jbd2_journal_unfile_buffer(struct journal_head *jh);
 
 /*
  * jbd2_get_transaction: obtain a new transaction_t object.
@@ -764,7 +765,6 @@
 	if (!jh->b_transaction) {
 		JBUFFER_TRACE(jh, "no transaction");
 		J_ASSERT_JH(jh, !jh->b_next_transaction);
-		jh->b_transaction = transaction;
 		JBUFFER_TRACE(jh, "file as BJ_Reserved");
 		spin_lock(&journal->j_list_lock);
 		__jbd2_journal_file_buffer(jh, transaction, BJ_Reserved);
@@ -814,7 +814,6 @@
  * int jbd2_journal_get_write_access() - notify intent to modify a buffer for metadata (not data) update.
  * @handle: transaction to add buffer modifications to
  * @bh:     bh to be used for metadata writes
- * @credits: variable that will receive credits for the buffer
  *
  * Returns an error code or 0 on success.
  *
@@ -896,8 +895,6 @@
 		 * committed and so it's safe to clear the dirty bit.
 		 */
 		clear_buffer_dirty(jh2bh(jh));
-		jh->b_transaction = transaction;
-
 		/* first access by this transaction */
 		jh->b_modified = 0;
 
@@ -932,7 +929,6 @@
  *     non-rewindable consequences
  * @handle: transaction
  * @bh: buffer to undo
- * @credits: store the number of taken credits here (if not NULL)
  *
  * Sometimes there is a need to distinguish between metadata which has
  * been committed to disk and that which has not.  The ext3fs code uses
@@ -1232,8 +1228,6 @@
 			__jbd2_journal_file_buffer(jh, transaction, BJ_Forget);
 		} else {
 			__jbd2_journal_unfile_buffer(jh);
-			jbd2_journal_remove_journal_head(bh);
-			__brelse(bh);
 			if (!buffer_jbd(bh)) {
 				spin_unlock(&journal->j_list_lock);
 				jbd_unlock_bh_state(bh);
@@ -1556,19 +1550,32 @@
 		mark_buffer_dirty(bh);	/* Expose it to the VM */
 }
 
-void __jbd2_journal_unfile_buffer(struct journal_head *jh)
+/*
+ * Remove buffer from all transactions.
+ *
+ * Called with bh_state lock and j_list_lock
+ *
+ * jh and bh may be already freed when this function returns.
+ */
+static void __jbd2_journal_unfile_buffer(struct journal_head *jh)
 {
 	__jbd2_journal_temp_unlink_buffer(jh);
 	jh->b_transaction = NULL;
+	jbd2_journal_put_journal_head(jh);
 }
 
 void jbd2_journal_unfile_buffer(journal_t *journal, struct journal_head *jh)
 {
-	jbd_lock_bh_state(jh2bh(jh));
+	struct buffer_head *bh = jh2bh(jh);
+
+	/* Get reference so that buffer cannot be freed before we unlock it */
+	get_bh(bh);
+	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
 	__jbd2_journal_unfile_buffer(jh);
 	spin_unlock(&journal->j_list_lock);
-	jbd_unlock_bh_state(jh2bh(jh));
+	jbd_unlock_bh_state(bh);
+	__brelse(bh);
 }
 
 /*
@@ -1595,8 +1602,6 @@
 		if (jh->b_jlist == BJ_None) {
 			JBUFFER_TRACE(jh, "remove from checkpoint list");
 			__jbd2_journal_remove_checkpoint(jh);
-			jbd2_journal_remove_journal_head(bh);
-			__brelse(bh);
 		}
 	}
 	spin_unlock(&journal->j_list_lock);
@@ -1659,7 +1664,6 @@
 		/*
 		 * We take our own ref against the journal_head here to avoid
 		 * having to add tons of locking around each instance of
-		 * jbd2_journal_remove_journal_head() and
 		 * jbd2_journal_put_journal_head().
 		 */
 		jh = jbd2_journal_grab_journal_head(bh);
@@ -1697,10 +1701,9 @@
 	int may_free = 1;
 	struct buffer_head *bh = jh2bh(jh);
 
-	__jbd2_journal_unfile_buffer(jh);
-
 	if (jh->b_cp_transaction) {
 		JBUFFER_TRACE(jh, "on running+cp transaction");
+		__jbd2_journal_temp_unlink_buffer(jh);
 		/*
 		 * We don't want to write the buffer anymore, clear the
 		 * bit so that we don't confuse checks in
@@ -1711,8 +1714,7 @@
 		may_free = 0;
 	} else {
 		JBUFFER_TRACE(jh, "on running transaction");
-		jbd2_journal_remove_journal_head(bh);
-		__brelse(bh);
+		__jbd2_journal_unfile_buffer(jh);
 	}
 	return may_free;
 }
@@ -1990,6 +1992,8 @@
 
 	if (jh->b_transaction)
 		__jbd2_journal_temp_unlink_buffer(jh);
+	else
+		jbd2_journal_grab_journal_head(bh);
 	jh->b_transaction = transaction;
 
 	switch (jlist) {
@@ -2041,9 +2045,10 @@
  * already started to be used by a subsequent transaction, refile the
  * buffer on that transaction's metadata list.
  *
- * Called under journal->j_list_lock
- *
+ * Called under j_list_lock
  * Called under jbd_lock_bh_state(jh2bh(jh))
+ *
+ * jh and bh may be already free when this function returns
  */
 void __jbd2_journal_refile_buffer(struct journal_head *jh)
 {
@@ -2067,6 +2072,11 @@
 
 	was_dirty = test_clear_buffer_jbddirty(bh);
 	__jbd2_journal_temp_unlink_buffer(jh);
+	/*
+	 * We set b_transaction here because b_next_transaction will inherit
+	 * our jh reference and thus __jbd2_journal_file_buffer() must not
+	 * take a new one.
+	 */
 	jh->b_transaction = jh->b_next_transaction;
 	jh->b_next_transaction = NULL;
 	if (buffer_freed(bh))
@@ -2083,30 +2093,21 @@
 }
 
 /*
- * For the unlocked version of this call, also make sure that any
- * hanging journal_head is cleaned up if necessary.
+ * __jbd2_journal_refile_buffer() with necessary locking added. We take our
+ * bh reference so that we can safely unlock bh.
  *
- * __jbd2_journal_refile_buffer is usually called as part of a single locked
- * operation on a buffer_head, in which the caller is probably going to
- * be hooking the journal_head onto other lists.  In that case it is up
- * to the caller to remove the journal_head if necessary.  For the
- * unlocked jbd2_journal_refile_buffer call, the caller isn't going to be
- * doing anything else to the buffer so we need to do the cleanup
- * ourselves to avoid a jh leak.
- *
- * *** The journal_head may be freed by this call! ***
+ * The jh and bh may be freed by this call.
  */
 void jbd2_journal_refile_buffer(journal_t *journal, struct journal_head *jh)
 {
 	struct buffer_head *bh = jh2bh(jh);
 
+	/* Get reference so that buffer cannot be freed before we unlock it */
+	get_bh(bh);
 	jbd_lock_bh_state(bh);
 	spin_lock(&journal->j_list_lock);
-
 	__jbd2_journal_refile_buffer(jh);
 	jbd_unlock_bh_state(bh);
-	jbd2_journal_remove_journal_head(bh);
-
 	spin_unlock(&journal->j_list_lock);
 	__brelse(bh);
 }
diff --git a/fs/jfs/file.c b/fs/jfs/file.c
index c5ce6c1..2f3f531 100644
--- a/fs/jfs/file.c
+++ b/fs/jfs/file.c
@@ -66,9 +66,9 @@
 		struct jfs_inode_info *ji = JFS_IP(inode);
 		spin_lock_irq(&ji->ag_lock);
 		if (ji->active_ag == -1) {
-			ji->active_ag = ji->agno;
-			atomic_inc(
-			    &JFS_SBI(inode->i_sb)->bmap->db_active[ji->agno]);
+			struct jfs_sb_info *jfs_sb = JFS_SBI(inode->i_sb);
+			ji->active_ag = BLKTOAG(addressPXD(&ji->ixpxd), jfs_sb);
+			atomic_inc( &jfs_sb->bmap->db_active[ji->active_ag]);
 		}
 		spin_unlock_irq(&ji->ag_lock);
 	}
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c
index ed53a47..b78b2f9 100644
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -397,7 +397,7 @@
 	release_metapage(mp);
 
 	/* set the ag for the inode */
-	JFS_IP(ip)->agno = BLKTOAG(agstart, sbi);
+	JFS_IP(ip)->agstart = agstart;
 	JFS_IP(ip)->active_ag = -1;
 
 	return (rc);
@@ -901,7 +901,7 @@
 
 	/* get the allocation group for this ino.
 	 */
-	agno = JFS_IP(ip)->agno;
+	agno = BLKTOAG(JFS_IP(ip)->agstart, JFS_SBI(ip->i_sb));
 
 	/* Lock the AG specific inode map information
 	 */
@@ -1315,12 +1315,11 @@
 static inline void
 diInitInode(struct inode *ip, int iagno, int ino, int extno, struct iag * iagp)
 {
-	struct jfs_sb_info *sbi = JFS_SBI(ip->i_sb);
 	struct jfs_inode_info *jfs_ip = JFS_IP(ip);
 
 	ip->i_ino = (iagno << L2INOSPERIAG) + ino;
 	jfs_ip->ixpxd = iagp->inoext[extno];
-	jfs_ip->agno = BLKTOAG(le64_to_cpu(iagp->agstart), sbi);
+	jfs_ip->agstart = le64_to_cpu(iagp->agstart);
 	jfs_ip->active_ag = -1;
 }
 
@@ -1379,7 +1378,7 @@
 	 */
 
 	/* get the ag number of this iag */
-	agno = JFS_IP(pip)->agno;
+	agno = BLKTOAG(JFS_IP(pip)->agstart, JFS_SBI(pip->i_sb));
 
 	if (atomic_read(&JFS_SBI(pip->i_sb)->bmap->db_active[agno])) {
 		/*
@@ -2921,10 +2920,9 @@
 			continue;
 		}
 
-		/* agstart that computes to the same ag is treated as same; */
 		agstart = le64_to_cpu(iagp->agstart);
-		/* iagp->agstart = agstart & ~(mp->db_agsize - 1); */
 		n = agstart >> mp->db_agl2size;
+		iagp->agstart = cpu_to_le64((s64)n << mp->db_agl2size);
 
 		/* compute backed inodes */
 		numinos = (EXTSPERIAG - le32_to_cpu(iagp->nfreeexts))
diff --git a/fs/jfs/jfs_incore.h b/fs/jfs/jfs_incore.h
index 1439f11..584a4a1 100644
--- a/fs/jfs/jfs_incore.h
+++ b/fs/jfs/jfs_incore.h
@@ -50,8 +50,9 @@
 	short	btindex;	/* btpage entry index*/
 	struct inode *ipimap;	/* inode map			*/
 	unsigned long cflag;	/* commit flags		*/
+	u64	agstart;	/* agstart of the containing IAG */
 	u16	bxflag;		/* xflag of pseudo buffer?	*/
-	unchar	agno;		/* ag number			*/
+	unchar	pad;
 	signed char active_ag;	/* ag currently allocating from	*/
 	lid_t	blid;		/* lid of pseudo buffer?	*/
 	lid_t	atlhead;	/* anonymous tlock list head	*/
diff --git a/fs/jfs/resize.c b/fs/jfs/resize.c
index 8ea5efb..8d0c1c7 100644
--- a/fs/jfs/resize.c
+++ b/fs/jfs/resize.c
@@ -80,7 +80,7 @@
 	int log_formatted = 0;
 	struct inode *iplist[1];
 	struct jfs_superblock *j_sb, *j_sb2;
-	uint old_agsize;
+	s64 old_agsize;
 	int agsizechanged = 0;
 	struct buffer_head *bh, *bh2;
 
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index adb45ec..e374050 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -708,7 +708,13 @@
 
 	if (task->tk_status < 0) {
 		dprintk("lockd: unlock failed (err = %d)\n", -task->tk_status);
-		goto retry_rebind;
+		switch (task->tk_status) {
+		case -EACCES:
+		case -EIO:
+			goto die;
+		default:
+			goto retry_rebind;
+		}
 	}
 	if (status == NLM_LCK_DENIED_GRACE_PERIOD) {
 		rpc_delay(task, NLMCLNT_GRACE_WAIT);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 144f2a3..6f4850d 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -256,7 +256,8 @@
 
 	nfs_attr_check_mountpoint(sb, fattr);
 
-	if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0)
+	if (((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) &&
+	    !nfs_attr_use_mounted_on_fileid(fattr))
 		goto out_no_inode;
 	if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
 		goto out_no_inode;
@@ -1294,7 +1295,8 @@
 		if (new_isize != cur_isize) {
 			/* Do we perhaps have any outstanding writes, or has
 			 * the file grown beyond our last write? */
-			if (nfsi->npages == 0 || new_isize > cur_isize) {
+			if ((nfsi->npages == 0 && !test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) ||
+			     new_isize > cur_isize) {
 				i_size_write(inode, new_isize);
 				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 			}
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index b9056cb..2a55347 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -45,6 +45,17 @@
 		fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT;
 }
 
+static inline int nfs_attr_use_mounted_on_fileid(struct nfs_fattr *fattr)
+{
+	if (((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) == 0) ||
+	    (((fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0) &&
+	     ((fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) == 0)))
+		return 0;
+
+	fattr->fileid = fattr->mounted_on_fileid;
+	return 1;
+}
+
 struct nfs_clone_mount {
 	const struct super_block *sb;
 	const struct dentry *dentry;
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 4269088..0bafcc9 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -30,6 +30,7 @@
  */
 
 #include <linux/nfs_fs.h>
+#include <linux/nfs_page.h>
 
 #include "internal.h"
 #include "nfs4filelayout.h"
@@ -552,13 +553,18 @@
 		__func__, nfl_util, fl->num_fh, fl->first_stripe_index,
 		fl->pattern_offset);
 
-	if (!fl->num_fh)
+	/* Note that a zero value for num_fh is legal for STRIPE_SPARSE.
+	 * Futher checking is done in filelayout_check_layout */
+	if (fl->num_fh < 0 || fl->num_fh >
+	    max(NFS4_PNFS_MAX_STRIPE_CNT, NFS4_PNFS_MAX_MULTI_CNT))
 		goto out_err;
 
-	fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
-			       gfp_flags);
-	if (!fl->fh_array)
-		goto out_err;
+	if (fl->num_fh > 0) {
+		fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
+				       gfp_flags);
+		if (!fl->fh_array)
+			goto out_err;
+	}
 
 	for (i = 0; i < fl->num_fh; i++) {
 		/* Do we want to use a mempool here? */
@@ -661,8 +667,9 @@
 	u64 p_stripe, r_stripe;
 	u32 stripe_unit;
 
-	if (!pnfs_generic_pg_test(pgio, prev, req))
-		return 0;
+	if (!pnfs_generic_pg_test(pgio, prev, req) ||
+	    !nfs_generic_pg_test(pgio, prev, req))
+		return false;
 
 	if (!pgio->pg_lseg)
 		return 1;
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index d2c4b59..5879b23 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2265,12 +2265,14 @@
 	return nfs4_map_errors(status);
 }
 
+static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
 /*
  * Get locations and (maybe) other attributes of a referral.
  * Note that we'll actually follow the referral later when
  * we detect fsid mismatch in inode revalidation
  */
-static int nfs4_get_referral(struct inode *dir, const struct qstr *name, struct nfs_fattr *fattr, struct nfs_fh *fhandle)
+static int nfs4_get_referral(struct inode *dir, const struct qstr *name,
+			     struct nfs_fattr *fattr, struct nfs_fh *fhandle)
 {
 	int status = -ENOMEM;
 	struct page *page = NULL;
@@ -2288,15 +2290,16 @@
 		goto out;
 	/* Make sure server returned a different fsid for the referral */
 	if (nfs_fsid_equal(&NFS_SERVER(dir)->fsid, &locations->fattr.fsid)) {
-		dprintk("%s: server did not return a different fsid for a referral at %s\n", __func__, name->name);
+		dprintk("%s: server did not return a different fsid for"
+			" a referral at %s\n", __func__, name->name);
 		status = -EIO;
 		goto out;
 	}
+	/* Fixup attributes for the nfs_lookup() call to nfs_fhget() */
+	nfs_fixup_referral_attributes(&locations->fattr);
 
+	/* replace the lookup nfs_fattr with the locations nfs_fattr */
 	memcpy(fattr, &locations->fattr, sizeof(struct nfs_fattr));
-	fattr->valid |= NFS_ATTR_FATTR_V4_REFERRAL;
-	if (!fattr->mode)
-		fattr->mode = S_IFDIR;
 	memset(fhandle, 0, sizeof(struct nfs_fh));
 out:
 	if (page)
@@ -4667,11 +4670,15 @@
 	return len;
 }
 
+/*
+ * nfs_fhget will use either the mounted_on_fileid or the fileid
+ */
 static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr)
 {
-	if (!((fattr->valid & NFS_ATTR_FATTR_FILEID) &&
-		(fattr->valid & NFS_ATTR_FATTR_FSID) &&
-		(fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
+	if (!(((fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID) ||
+	       (fattr->valid & NFS_ATTR_FATTR_FILEID)) &&
+	      (fattr->valid & NFS_ATTR_FATTR_FSID) &&
+	      (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)))
 		return;
 
 	fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
@@ -4686,7 +4693,6 @@
 	struct nfs_server *server = NFS_SERVER(dir);
 	u32 bitmask[2] = {
 		[0] = FATTR4_WORD0_FSID | FATTR4_WORD0_FS_LOCATIONS,
-		[1] = FATTR4_WORD1_MOUNTED_ON_FILEID,
 	};
 	struct nfs4_fs_locations_arg args = {
 		.dir_fh = NFS_FH(dir),
@@ -4705,11 +4711,18 @@
 	int status;
 
 	dprintk("%s: start\n", __func__);
+
+	/* Ask for the fileid of the absent filesystem if mounted_on_fileid
+	 * is not supported */
+	if (NFS_SERVER(dir)->attr_bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
+		bitmask[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID;
+	else
+		bitmask[0] |= FATTR4_WORD0_FILEID;
+
 	nfs_fattr_init(&fs_locations->fattr);
 	fs_locations->server = server;
 	fs_locations->nlocations = 0;
 	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
-	nfs_fixup_referral_attributes(&fs_locations->fattr);
 	dprintk("%s: returned status = %d\n", __func__, status);
 	return status;
 }
@@ -5098,7 +5111,6 @@
 	if (mxresp_sz == 0)
 		mxresp_sz = NFS_MAX_FILE_IO_SIZE;
 	/* Fore channel attributes */
-	args->fc_attrs.headerpadsz = 0;
 	args->fc_attrs.max_rqst_sz = mxrqst_sz;
 	args->fc_attrs.max_resp_sz = mxresp_sz;
 	args->fc_attrs.max_ops = NFS4_MAX_OPS;
@@ -5111,7 +5123,6 @@
 		args->fc_attrs.max_ops, args->fc_attrs.max_reqs);
 
 	/* Back channel attributes */
-	args->bc_attrs.headerpadsz = 0;
 	args->bc_attrs.max_rqst_sz = PAGE_SIZE;
 	args->bc_attrs.max_resp_sz = PAGE_SIZE;
 	args->bc_attrs.max_resp_sz_cached = 0;
@@ -5131,8 +5142,6 @@
 	struct nfs4_channel_attrs *sent = &args->fc_attrs;
 	struct nfs4_channel_attrs *rcvd = &session->fc_attrs;
 
-	if (rcvd->headerpadsz > sent->headerpadsz)
-		return -EINVAL;
 	if (rcvd->max_resp_sz > sent->max_resp_sz)
 		return -EINVAL;
 	/*
@@ -5697,6 +5706,7 @@
 {
 	struct nfs4_layoutreturn *lrp = calldata;
 	struct nfs_server *server;
+	struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
 
 	dprintk("--> %s\n", __func__);
 
@@ -5708,16 +5718,15 @@
 		nfs_restart_rpc(task, lrp->clp);
 		return;
 	}
+	spin_lock(&lo->plh_inode->i_lock);
 	if (task->tk_status == 0) {
-		struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout;
-
 		if (lrp->res.lrs_present) {
-			spin_lock(&lo->plh_inode->i_lock);
 			pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
-			spin_unlock(&lo->plh_inode->i_lock);
 		} else
 			BUG_ON(!list_empty(&lo->plh_segs));
 	}
+	lo->plh_block_lgets--;
+	spin_unlock(&lo->plh_inode->i_lock);
 	dprintk("<-- %s\n", __func__);
 }
 
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index d869a5e..6870bc6 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -255,7 +255,7 @@
 #define decode_fs_locations_maxsz \
 				(0)
 #define encode_secinfo_maxsz	(op_encode_hdr_maxsz + nfs4_name_maxsz)
-#define decode_secinfo_maxsz	(op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)))
+#define decode_secinfo_maxsz	(op_decode_hdr_maxsz + 1 + ((NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)) / 4))
 
 #if defined(CONFIG_NFS_V4_1)
 #define NFS4_MAX_MACHINE_NAME_LEN (64)
@@ -1725,7 +1725,7 @@
 	*p++ = cpu_to_be32(args->flags);			/*flags */
 
 	/* Fore Channel */
-	*p++ = cpu_to_be32(args->fc_attrs.headerpadsz);	/* header padding size */
+	*p++ = cpu_to_be32(0);				/* header padding size */
 	*p++ = cpu_to_be32(args->fc_attrs.max_rqst_sz);	/* max req size */
 	*p++ = cpu_to_be32(args->fc_attrs.max_resp_sz);	/* max resp size */
 	*p++ = cpu_to_be32(max_resp_sz_cached);		/* Max resp sz cached */
@@ -1734,7 +1734,7 @@
 	*p++ = cpu_to_be32(0);				/* rdmachannel_attrs */
 
 	/* Back Channel */
-	*p++ = cpu_to_be32(args->fc_attrs.headerpadsz);	/* header padding size */
+	*p++ = cpu_to_be32(0);				/* header padding size */
 	*p++ = cpu_to_be32(args->bc_attrs.max_rqst_sz);	/* max req size */
 	*p++ = cpu_to_be32(args->bc_attrs.max_resp_sz);	/* max resp size */
 	*p++ = cpu_to_be32(args->bc_attrs.max_resp_sz_cached);	/* Max resp sz cached */
@@ -3098,7 +3098,7 @@
 	return -EIO;
 }
 
-static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
+static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap, int32_t *res)
 {
 	__be32 *p;
 
@@ -3109,7 +3109,7 @@
 		if (unlikely(!p))
 			goto out_overflow;
 		bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
-		return -be32_to_cpup(p);
+		*res = -be32_to_cpup(p);
 	}
 	return 0;
 out_overflow:
@@ -4070,6 +4070,7 @@
 	int status;
 	umode_t fmode = 0;
 	uint32_t type;
+	int32_t err;
 
 	status = decode_attr_type(xdr, bitmap, &type);
 	if (status < 0)
@@ -4095,13 +4096,12 @@
 		goto xdr_error;
 	fattr->valid |= status;
 
-	status = decode_attr_error(xdr, bitmap);
-	if (status == -NFS4ERR_WRONGSEC) {
-		nfs_fixup_secinfo_attributes(fattr, fh);
-		status = 0;
-	}
+	err = 0;
+	status = decode_attr_error(xdr, bitmap, &err);
 	if (status < 0)
 		goto xdr_error;
+	if (err == -NFS4ERR_WRONGSEC)
+		nfs_fixup_secinfo_attributes(fattr, fh);
 
 	status = decode_attr_filehandle(xdr, bitmap, fh);
 	if (status < 0)
@@ -4997,12 +4997,14 @@
 			     struct nfs4_channel_attrs *attrs)
 {
 	__be32 *p;
-	u32 nr_attrs;
+	u32 nr_attrs, val;
 
 	p = xdr_inline_decode(xdr, 28);
 	if (unlikely(!p))
 		goto out_overflow;
-	attrs->headerpadsz = be32_to_cpup(p++);
+	val = be32_to_cpup(p++);	/* headerpadsz */
+	if (val)
+		return -EINVAL;		/* no support for header padding yet */
 	attrs->max_rqst_sz = be32_to_cpup(p++);
 	attrs->max_resp_sz = be32_to_cpup(p++);
 	attrs->max_resp_sz_cached = be32_to_cpup(p++);
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 9cf208d..8ff2ea3 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -108,7 +108,6 @@
 		de = n;
 	}
 
-	atomic_inc(&de->id_node.ref);
 	return de;
 }
 
@@ -1001,6 +1000,9 @@
 	if (!pnfs_generic_pg_test(pgio, prev, req))
 		return false;
 
+	if (pgio->pg_lseg == NULL)
+		return true;
+
 	return pgio->pg_count + req->wb_bytes <=
 			OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
 }
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index dc3956c..1d06f8e 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -291,7 +291,7 @@
 	struct nfs_read_data *rdata;
 
 	state->status = status;
-	dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof);
+	dprintk("%s: Begin status=%zd eof=%d\n", __func__, status, eof);
 	rdata = state->rpcdata;
 	rdata->task.tk_status = status;
 	if (status >= 0) {
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 7913961..0098557 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -204,7 +204,7 @@
 			TASK_UNINTERRUPTIBLE);
 }
 
-static bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
+bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc, struct nfs_page *prev, struct nfs_page *req)
 {
 	/*
 	 * FIXME: ideally we should be able to coalesce all requests
@@ -218,6 +218,7 @@
 
 	return desc->pg_count + req->wb_bytes <= desc->pg_bsize;
 }
+EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
 
 /**
  * nfs_pageio_init - initialise a page io descriptor
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 8c1309d..29c0ca7f 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -634,14 +634,16 @@
 
 	spin_lock(&ino->i_lock);
 	lo = nfsi->layout;
-	if (!lo || !mark_matching_lsegs_invalid(lo, &tmp_list, NULL)) {
+	if (!lo) {
 		spin_unlock(&ino->i_lock);
-		dprintk("%s: no layout segments to return\n", __func__);
-		goto out;
+		dprintk("%s: no layout to return\n", __func__);
+		return status;
 	}
 	stateid = nfsi->layout->plh_stateid;
 	/* Reference matched in nfs4_layoutreturn_release */
 	get_layout_hdr(lo);
+	mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
+	lo->plh_block_lgets++;
 	spin_unlock(&ino->i_lock);
 	pnfs_free_lseg_list(&tmp_list);
 
@@ -650,6 +652,9 @@
 	lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
 	if (unlikely(lrp == NULL)) {
 		status = -ENOMEM;
+		set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags);
+		set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags);
+		put_layout_hdr(lo);
 		goto out;
 	}
 
@@ -887,7 +892,7 @@
 			ret = get_lseg(lseg);
 			break;
 		}
-		if (cmp_layout(range, &lseg->pls_range) > 0)
+		if (lseg->pls_range.offset > range->offset)
 			break;
 	}
 
@@ -1059,23 +1064,36 @@
 		gfp_flags = GFP_NOFS;
 	}
 
-	if (pgio->pg_count == prev->wb_bytes) {
+	if (pgio->pg_lseg == NULL) {
+		if (pgio->pg_count != prev->wb_bytes)
+			return true;
 		/* This is first coelesce call for a series of nfs_pages */
 		pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
 						   prev->wb_context,
-						   req_offset(req),
+						   req_offset(prev),
 						   pgio->pg_count,
 						   access_type,
 						   gfp_flags);
-		return true;
+		if (pgio->pg_lseg == NULL)
+			return true;
 	}
 
-	if (pgio->pg_lseg &&
-	    req_offset(req) > end_offset(pgio->pg_lseg->pls_range.offset,
-					 pgio->pg_lseg->pls_range.length))
-		return false;
-
-	return true;
+	/*
+	 * Test if a nfs_page is fully contained in the pnfs_layout_range.
+	 * Note that this test makes several assumptions:
+	 * - that the previous nfs_page in the struct nfs_pageio_descriptor
+	 *   is known to lie within the range.
+	 *   - that the nfs_page being tested is known to be contiguous with the
+	 *   previous nfs_page.
+	 *   - Layout ranges are page aligned, so we only have to test the
+	 *   start offset of the request.
+	 *
+	 * Please also note that 'end_offset' is actually the offset of the
+	 * first byte that lies outside the pnfs_layout_range. FIXME?
+	 *
+	 */
+	return req_offset(req) < end_offset(pgio->pg_lseg->pls_range.offset,
+					 pgio->pg_lseg->pls_range.length);
 }
 EXPORT_SYMBOL_GPL(pnfs_generic_pg_test);
 
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 48d0a8e..96bf4e6 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -186,6 +186,7 @@
 /* pnfs_dev.c */
 struct nfs4_deviceid_node {
 	struct hlist_node		node;
+	struct hlist_node		tmpnode;
 	const struct pnfs_layoutdriver_type *ld;
 	const struct nfs_client		*nfs_client;
 	struct nfs4_deviceid		deviceid;
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index c65e133..f0f8e1e 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -174,6 +174,7 @@
 			const struct nfs4_deviceid *id)
 {
 	INIT_HLIST_NODE(&d->node);
+	INIT_HLIST_NODE(&d->tmpnode);
 	d->ld = ld;
 	d->nfs_client = nfs_client;
 	d->deviceid = *id;
@@ -208,6 +209,7 @@
 
 	hlist_add_head_rcu(&new->node, &nfs4_deviceid_cache[hash]);
 	spin_unlock(&nfs4_deviceid_lock);
+	atomic_inc(&new->ref);
 
 	return new;
 }
@@ -238,24 +240,29 @@
 _deviceid_purge_client(const struct nfs_client *clp, long hash)
 {
 	struct nfs4_deviceid_node *d;
-	struct hlist_node *n, *next;
+	struct hlist_node *n;
 	HLIST_HEAD(tmp);
 
+	spin_lock(&nfs4_deviceid_lock);
 	rcu_read_lock();
 	hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[hash], node)
 		if (d->nfs_client == clp && atomic_read(&d->ref)) {
 			hlist_del_init_rcu(&d->node);
-			hlist_add_head(&d->node, &tmp);
+			hlist_add_head(&d->tmpnode, &tmp);
 		}
 	rcu_read_unlock();
+	spin_unlock(&nfs4_deviceid_lock);
 
 	if (hlist_empty(&tmp))
 		return;
 
 	synchronize_rcu();
-	hlist_for_each_entry_safe(d, n, next, &tmp, node)
+	while (!hlist_empty(&tmp)) {
+		d = hlist_entry(tmp.first, struct nfs4_deviceid_node, tmpnode);
+		hlist_del(&d->tmpnode);
 		if (atomic_dec_and_test(&d->ref))
 			d->ld->free_deviceid_node(d);
+	}
 }
 
 void
@@ -263,8 +270,8 @@
 {
 	long h;
 
-	spin_lock(&nfs4_deviceid_lock);
+	if (!(clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_MDS))
+		return;
 	for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
 		_deviceid_purge_client(clp, h);
-	spin_unlock(&nfs4_deviceid_lock);
 }
diff --git a/include/linux/device.h b/include/linux/device.h
index c66111a..553fd37 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -654,13 +654,13 @@
 
 static inline void device_enable_async_suspend(struct device *dev)
 {
-	if (!dev->power.in_suspend)
+	if (!dev->power.is_prepared)
 		dev->power.async_suspend = true;
 }
 
 static inline void device_disable_async_suspend(struct device *dev)
 {
-	if (!dev->power.in_suspend)
+	if (!dev->power.is_prepared)
 		dev->power.async_suspend = false;
 }
 
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 4ecb7b1..d087c2e 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1024,7 +1024,6 @@
 
 /* Filing buffers */
 extern void jbd2_journal_unfile_buffer(journal_t *, struct journal_head *);
-extern void __jbd2_journal_unfile_buffer(struct journal_head *);
 extern void __jbd2_journal_refile_buffer(struct journal_head *);
 extern void jbd2_journal_refile_buffer(journal_t *, struct journal_head *);
 extern void __jbd2_journal_file_buffer(struct journal_head *, transaction_t *, int);
@@ -1165,7 +1164,6 @@
  */
 struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh);
 struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh);
-void jbd2_journal_remove_journal_head(struct buffer_head *bh);
 void jbd2_journal_put_journal_head(struct journal_head *jh);
 
 /*
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 3a34e80..25311b3 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -92,6 +92,9 @@
 				   struct nfs_page *);
 extern	void nfs_pageio_complete(struct nfs_pageio_descriptor *desc);
 extern	void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *, pgoff_t);
+extern bool nfs_generic_pg_test(struct nfs_pageio_descriptor *desc,
+				struct nfs_page *prev,
+				struct nfs_page *req);
 extern  int nfs_wait_on_request(struct nfs_page *);
 extern	void nfs_unlock_request(struct nfs_page *req);
 extern	int nfs_set_page_tag_locked(struct nfs_page *req);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 5e8444a..00848d8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -158,7 +158,6 @@
 
 /* nfs41 sessions channel attributes */
 struct nfs4_channel_attrs {
-	u32			headerpadsz;
 	u32			max_rqst_sz;
 	u32			max_resp_sz;
 	u32			max_resp_sz_cached;
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 3160648..411e4f4 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -425,7 +425,8 @@
 	pm_message_t		power_state;
 	unsigned int		can_wakeup:1;
 	unsigned int		async_suspend:1;
-	unsigned int		in_suspend:1;	/* Owned by the PM core */
+	bool			is_prepared:1;	/* Owned by the PM core */
+	bool			is_suspended:1;	/* Ditto */
 	spinlock_t		lock;
 #ifdef CONFIG_PM_SLEEP
 	struct list_head	entry;
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index f73c482..fe2d8e6 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -84,7 +84,8 @@
 #endif
 	unsigned char		tk_priority : 2,/* Task priority */
 				tk_garb_retry : 2,
-				tk_cred_retry : 2;
+				tk_cred_retry : 2,
+				tk_rebind_retry : 2;
 };
 #define tk_xprt			tk_client->cl_xprt
 
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index e09592d..5ce2b2f 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -26,7 +26,7 @@
 		__field(	umode_t, mode			)
 		__field(	uid_t,	uid			)
 		__field(	gid_t,	gid			)
-		__field(	blkcnt_t, blocks		)
+		__field(	__u64, blocks			)
 	),
 
 	TP_fast_assign(
@@ -40,9 +40,8 @@
 
 	TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino,
-		  __entry->mode, __entry->uid, __entry->gid,
-		  (unsigned long long) __entry->blocks)
+		  (unsigned long) __entry->ino, __entry->mode,
+		  __entry->uid, __entry->gid, __entry->blocks)
 );
 
 TRACE_EVENT(ext4_request_inode,
@@ -178,7 +177,7 @@
 	TP_printk("dev %d,%d ino %lu new_size %lld",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (long long) __entry->new_size)
+		  __entry->new_size)
 );
 
 DECLARE_EVENT_CLASS(ext4__write_begin,
@@ -204,7 +203,7 @@
 		__entry->flags	= flags;
 	),
 
-	TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
+	TP_printk("dev %d,%d ino %lu pos %lld len %u flags %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->pos, __entry->len, __entry->flags)
@@ -248,7 +247,7 @@
 		__entry->copied	= copied;
 	),
 
-	TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
+	TP_printk("dev %d,%d ino %lu pos %lld len %u copied %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->pos, __entry->len, __entry->copied)
@@ -286,29 +285,6 @@
 	TP_ARGS(inode, pos, len, copied)
 );
 
-TRACE_EVENT(ext4_writepage,
-	TP_PROTO(struct inode *inode, struct page *page),
-
-	TP_ARGS(inode, page),
-
-	TP_STRUCT__entry(
-		__field(	dev_t,	dev			)
-		__field(	ino_t,	ino			)
-		__field(	pgoff_t, index			)
-
-	),
-
-	TP_fast_assign(
-		__entry->dev	= inode->i_sb->s_dev;
-		__entry->ino	= inode->i_ino;
-		__entry->index	= page->index;
-	),
-
-	TP_printk("dev %d,%d ino %lu page_index %lu",
-		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino, __entry->index)
-);
-
 TRACE_EVENT(ext4_da_writepages,
 	TP_PROTO(struct inode *inode, struct writeback_control *wbc),
 
@@ -341,7 +317,7 @@
 	),
 
 	TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld "
-		  "range_start %llu range_end %llu sync_mode %d"
+		  "range_start %lld range_end %lld sync_mode %d"
 		  "for_kupdate %d range_cyclic %d writeback_index %lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->nr_to_write,
@@ -449,7 +425,14 @@
 	TP_printk("dev %d,%d ino %lu page_index %lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  __entry->index)
+		  (unsigned long) __entry->index)
+);
+
+DEFINE_EVENT(ext4__page_op, ext4_writepage,
+
+	TP_PROTO(struct page *page),
+
+	TP_ARGS(page)
 );
 
 DEFINE_EVENT(ext4__page_op, ext4_readpage,
@@ -489,7 +472,7 @@
 	TP_printk("dev %d,%d ino %lu page_index %lu offset %lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  __entry->index, __entry->offset)
+		  (unsigned long) __entry->index, __entry->offset)
 );
 
 TRACE_EVENT(ext4_discard_blocks,
@@ -562,12 +545,10 @@
 );
 
 TRACE_EVENT(ext4_mb_release_inode_pa,
-	TP_PROTO(struct super_block *sb,
-		 struct inode *inode,
-		 struct ext4_prealloc_space *pa,
+	TP_PROTO(struct ext4_prealloc_space *pa,
 		 unsigned long long block, unsigned int count),
 
-	TP_ARGS(sb, inode, pa, block, count),
+	TP_ARGS(pa, block, count),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
@@ -578,8 +559,8 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev		= sb->s_dev;
-		__entry->ino		= inode->i_ino;
+		__entry->dev		= pa->pa_inode->i_sb->s_dev;
+		__entry->ino		= pa->pa_inode->i_ino;
 		__entry->block		= block;
 		__entry->count		= count;
 	),
@@ -591,10 +572,9 @@
 );
 
 TRACE_EVENT(ext4_mb_release_group_pa,
-	TP_PROTO(struct super_block *sb,
-		 struct ext4_prealloc_space *pa),
+	TP_PROTO(struct ext4_prealloc_space *pa),
 
-	TP_ARGS(sb, pa),
+	TP_ARGS(pa),
 
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
@@ -604,7 +584,7 @@
 	),
 
 	TP_fast_assign(
-		__entry->dev		= sb->s_dev;
+		__entry->dev		= pa->pa_inode->i_sb->s_dev;
 		__entry->pa_pstart	= pa->pa_pstart;
 		__entry->pa_len		= pa->pa_len;
 	),
@@ -666,10 +646,10 @@
 		__field(	ino_t,	ino			)
 		__field(	unsigned int, flags		)
 		__field(	unsigned int, len		)
-		__field(	__u64,  logical			)
+		__field(	__u32,  logical			)
+		__field(	__u32,	lleft			)
+		__field(	__u32,	lright			)
 		__field(	__u64,	goal			)
-		__field(	__u64,	lleft			)
-		__field(	__u64,	lright			)
 		__field(	__u64,	pleft			)
 		__field(	__u64,	pright			)
 	),
@@ -687,17 +667,13 @@
 		__entry->pright	= ar->pright;
 	),
 
-	TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu "
-		  "lleft %llu lright %llu pleft %llu pright %llu ",
+	TP_printk("dev %d,%d ino %lu flags %u len %u lblk %u goal %llu "
+		  "lleft %u lright %u pleft %llu pright %llu ",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino,
-		  __entry->flags, __entry->len,
-		  (unsigned long long) __entry->logical,
-		  (unsigned long long) __entry->goal,
-		  (unsigned long long) __entry->lleft,
-		  (unsigned long long) __entry->lright,
-		  (unsigned long long) __entry->pleft,
-		  (unsigned long long) __entry->pright)
+		  (unsigned long) __entry->ino, __entry->flags,
+		  __entry->len, __entry->logical, __entry->goal,
+		  __entry->lleft, __entry->lright, __entry->pleft,
+		  __entry->pright)
 );
 
 TRACE_EVENT(ext4_allocate_blocks,
@@ -711,10 +687,10 @@
 		__field(	__u64,	block			)
 		__field(	unsigned int, flags		)
 		__field(	unsigned int, len		)
-		__field(	__u64,  logical			)
+		__field(	__u32,  logical			)
+		__field(	__u32,	lleft			)
+		__field(	__u32,	lright			)
 		__field(	__u64,	goal			)
-		__field(	__u64,	lleft			)
-		__field(	__u64,	lright			)
 		__field(	__u64,	pleft			)
 		__field(	__u64,	pright			)
 	),
@@ -733,17 +709,13 @@
 		__entry->pright	= ar->pright;
 	),
 
-	TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu "
-		  "goal %llu lleft %llu lright %llu pleft %llu pright %llu",
+	TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %u "
+		  "goal %llu lleft %u lright %u pleft %llu pright %llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino,
-		  __entry->flags, __entry->len, __entry->block,
-		  (unsigned long long) __entry->logical,
-		  (unsigned long long) __entry->goal,
-		  (unsigned long long) __entry->lleft,
-		  (unsigned long long) __entry->lright,
-		  (unsigned long long) __entry->pleft,
-		  (unsigned long long) __entry->pright)
+		  (unsigned long) __entry->ino, __entry->flags,
+		  __entry->len, __entry->block, __entry->logical,
+		  __entry->goal,  __entry->lleft, __entry->lright,
+		  __entry->pleft, __entry->pright)
 );
 
 TRACE_EVENT(ext4_free_blocks,
@@ -755,10 +727,10 @@
 	TP_STRUCT__entry(
 		__field(	dev_t,	dev			)
 		__field(	ino_t,	ino			)
-		__field(      umode_t, mode			)
+		__field(	umode_t, mode			)
 		__field(	__u64,	block			)
 		__field(	unsigned long,	count		)
-		__field(	 int,	flags			)
+		__field(	int,	flags			)
 	),
 
 	TP_fast_assign(
@@ -798,7 +770,7 @@
 		__entry->parent		= dentry->d_parent->d_inode->i_ino;
 	),
 
-	TP_printk("dev %d,%d ino %ld parent %ld datasync %d ",
+	TP_printk("dev %d,%d ino %lu parent %lu datasync %d ",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  (unsigned long) __entry->parent, __entry->datasync)
@@ -821,7 +793,7 @@
 		__entry->dev		= inode->i_sb->s_dev;
 	),
 
-	TP_printk("dev %d,%d ino %ld ret %d",
+	TP_printk("dev %d,%d ino %lu ret %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->ret)
@@ -1005,7 +977,7 @@
 		__entry->result_len	= len;
 	),
 
-	TP_printk("dev %d,%d inode %lu extent %u/%d/%u ",
+	TP_printk("dev %d,%d inode %lu extent %u/%d/%d ",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->result_group, __entry->result_start,
@@ -1093,7 +1065,7 @@
 		  "allocated_meta_blocks %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  __entry->mode,  (unsigned long long) __entry->i_blocks,
+		  __entry->mode, __entry->i_blocks,
 		  __entry->used_blocks, __entry->reserved_data_blocks,
 		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
 );
@@ -1127,7 +1099,7 @@
 		  "reserved_data_blocks %d reserved_meta_blocks %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  __entry->mode, (unsigned long long) __entry->i_blocks,
+		  __entry->mode, __entry->i_blocks,
 		  __entry->md_needed, __entry->reserved_data_blocks,
 		  __entry->reserved_meta_blocks)
 );
@@ -1164,7 +1136,7 @@
 		  "allocated_meta_blocks %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  __entry->mode, (unsigned long long) __entry->i_blocks,
+		  __entry->mode, __entry->i_blocks,
 		  __entry->freed_blocks, __entry->reserved_data_blocks,
 		  __entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
 );
@@ -1239,14 +1211,15 @@
 		__entry->rw	= rw;
 	),
 
-	TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d",
+	TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (unsigned long long) __entry->pos, __entry->len, __entry->rw)
+		  __entry->pos, __entry->len, __entry->rw)
 );
 
 TRACE_EVENT(ext4_direct_IO_exit,
-	TP_PROTO(struct inode *inode, loff_t offset, unsigned long len, int rw, int ret),
+	TP_PROTO(struct inode *inode, loff_t offset, unsigned long len,
+		 int rw, int ret),
 
 	TP_ARGS(inode, offset, len, rw, ret),
 
@@ -1268,10 +1241,10 @@
 		__entry->ret	= ret;
 	),
 
-	TP_printk("dev %d,%d ino %lu pos %llu len %lu rw %d ret %d",
+	TP_printk("dev %d,%d ino %lu pos %lld len %lu rw %d ret %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (unsigned long long) __entry->pos, __entry->len,
+		  __entry->pos, __entry->len,
 		  __entry->rw, __entry->ret)
 );
 
@@ -1296,15 +1269,15 @@
 		__entry->mode	= mode;
 	),
 
-	TP_printk("dev %d,%d ino %ld pos %llu len %llu mode %d",
+	TP_printk("dev %d,%d ino %lu pos %lld len %lld mode %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino,
-		  (unsigned long long) __entry->pos,
-		  (unsigned long long) __entry->len, __entry->mode)
+		  (unsigned long) __entry->ino, __entry->pos,
+		  __entry->len, __entry->mode)
 );
 
 TRACE_EVENT(ext4_fallocate_exit,
-	TP_PROTO(struct inode *inode, loff_t offset, unsigned int max_blocks, int ret),
+	TP_PROTO(struct inode *inode, loff_t offset,
+		 unsigned int max_blocks, int ret),
 
 	TP_ARGS(inode, offset, max_blocks, ret),
 
@@ -1312,7 +1285,7 @@
 		__field(	ino_t,	ino			)
 		__field(	dev_t,	dev			)
 		__field(	loff_t,	pos			)
-		__field(	unsigned,	blocks		)
+		__field(	unsigned int,	blocks		)
 		__field(	int, 	ret			)
 	),
 
@@ -1324,10 +1297,10 @@
 		__entry->ret	= ret;
 	),
 
-	TP_printk("dev %d,%d ino %ld pos %llu blocks %d ret %d",
+	TP_printk("dev %d,%d ino %lu pos %lld blocks %u ret %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (unsigned long long) __entry->pos, __entry->blocks,
+		  __entry->pos, __entry->blocks,
 		  __entry->ret)
 );
 
@@ -1350,7 +1323,7 @@
 		__entry->dev		= dentry->d_inode->i_sb->s_dev;
 	),
 
-	TP_printk("dev %d,%d ino %ld size %lld parent %ld",
+	TP_printk("dev %d,%d ino %lu size %lld parent %lu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino, __entry->size,
 		  (unsigned long) __entry->parent)
@@ -1373,7 +1346,7 @@
 		__entry->ret		= ret;
 	),
 
-	TP_printk("dev %d,%d ino %ld ret %d",
+	TP_printk("dev %d,%d ino %lu ret %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
 		  __entry->ret)
@@ -1387,7 +1360,7 @@
 	TP_STRUCT__entry(
 		__field(	ino_t,  	ino		)
 		__field(	dev_t,  	dev		)
-		__field(	blkcnt_t,	blocks		)
+		__field(	__u64,		blocks		)
 	),
 
 	TP_fast_assign(
@@ -1396,9 +1369,9 @@
 		__entry->blocks	= inode->i_blocks;
 	),
 
-	TP_printk("dev %d,%d ino %lu blocks %lu",
+	TP_printk("dev %d,%d ino %lu blocks %llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
-		  (unsigned long) __entry->ino, (unsigned long) __entry->blocks)
+		  (unsigned long) __entry->ino, __entry->blocks)
 );
 
 DEFINE_EVENT(ext4__truncate, ext4_truncate_enter,
@@ -1417,7 +1390,7 @@
 
 DECLARE_EVENT_CLASS(ext4__map_blocks_enter,
 	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
-		 unsigned len, unsigned flags),
+		 unsigned int len, unsigned int flags),
 
 	TP_ARGS(inode, lblk, len, flags),
 
@@ -1425,8 +1398,8 @@
 		__field(	ino_t,  	ino		)
 		__field(	dev_t,  	dev		)
 		__field(	ext4_lblk_t,	lblk		)
-		__field(	unsigned,	len		)
-		__field(	unsigned,	flags		)
+		__field(	unsigned int,	len		)
+		__field(	unsigned int,	flags		)
 	),
 
 	TP_fast_assign(
@@ -1440,7 +1413,7 @@
 	TP_printk("dev %d,%d ino %lu lblk %u len %u flags %u",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (unsigned) __entry->lblk, __entry->len, __entry->flags)
+		  __entry->lblk, __entry->len, __entry->flags)
 );
 
 DEFINE_EVENT(ext4__map_blocks_enter, ext4_ext_map_blocks_enter,
@@ -1459,7 +1432,7 @@
 
 DECLARE_EVENT_CLASS(ext4__map_blocks_exit,
 	TP_PROTO(struct inode *inode, ext4_lblk_t lblk,
-		 ext4_fsblk_t pblk, unsigned len, int ret),
+		 ext4_fsblk_t pblk, unsigned int len, int ret),
 
 	TP_ARGS(inode, lblk, pblk, len, ret),
 
@@ -1468,7 +1441,7 @@
 		__field(	dev_t,		dev		)
 		__field(	ext4_lblk_t,	lblk		)
 		__field(	ext4_fsblk_t,	pblk		)
-		__field(	unsigned,	len		)
+		__field(	unsigned int,	len		)
 		__field(	int,		ret		)
 	),
 
@@ -1484,7 +1457,7 @@
 	TP_printk("dev %d,%d ino %lu lblk %u pblk %llu len %u ret %d",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (unsigned) __entry->lblk, (unsigned long long) __entry->pblk,
+		  __entry->lblk, __entry->pblk,
 		  __entry->len, __entry->ret)
 );
 
@@ -1524,7 +1497,7 @@
 	TP_printk("dev %d,%d ino %lu lblk %u pblk %llu",
 		  MAJOR(__entry->dev), MINOR(__entry->dev),
 		  (unsigned long) __entry->ino,
-		  (unsigned) __entry->lblk, (unsigned long long) __entry->pblk)
+		  __entry->lblk, __entry->pblk)
 );
 
 TRACE_EVENT(ext4_load_inode,
diff --git a/kernel/power/user.c b/kernel/power/user.c
index 7d02d33..42ddbc6 100644
--- a/kernel/power/user.c
+++ b/kernel/power/user.c
@@ -113,8 +113,10 @@
 		if (error)
 			pm_notifier_call_chain(PM_POST_RESTORE);
 	}
-	if (error)
+	if (error) {
+		free_basic_memory_bitmaps();
 		atomic_inc(&snapshot_device_available);
+	}
 	data->frozen = 0;
 	data->ready = 0;
 	data->platform_support = 0;
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 02159c7..c46887b 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -498,7 +498,9 @@
 	 * The node we allocated has no zone fallback lists. For avoiding
 	 * to access not-initialized zonelist, build here.
 	 */
+	mutex_lock(&zonelists_mutex);
 	build_all_zonelists(NULL);
+	mutex_unlock(&zonelists_mutex);
 
 	return pgdat;
 }
@@ -521,7 +523,7 @@
 
 	lock_memory_hotplug();
 	pgdat = hotadd_new_pgdat(nid, 0);
-	if (pgdat) {
+	if (!pgdat) {
 		ret = -ENOMEM;
 		goto out;
 	}
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 339ba64..5daf6cc 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -577,13 +577,13 @@
 	}
 	inode = &gss_msg->inode->vfs_inode;
 	for (;;) {
-		prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_INTERRUPTIBLE);
+		prepare_to_wait(&gss_msg->waitqueue, &wait, TASK_KILLABLE);
 		spin_lock(&inode->i_lock);
 		if (gss_msg->ctx != NULL || gss_msg->msg.errno < 0) {
 			break;
 		}
 		spin_unlock(&inode->i_lock);
-		if (signalled()) {
+		if (fatal_signal_pending(current)) {
 			err = -ERESTARTSYS;
 			goto out_intr;
 		}
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index b84d739..8c91415 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1061,7 +1061,7 @@
 
 	dprintk("RPC: %5u rpc_buffer allocation failed\n", task->tk_pid);
 
-	if (RPC_IS_ASYNC(task) || !signalled()) {
+	if (RPC_IS_ASYNC(task) || !fatal_signal_pending(current)) {
 		task->tk_action = call_allocate;
 		rpc_delay(task, HZ>>4);
 		return;
@@ -1175,6 +1175,9 @@
 			status = -EOPNOTSUPP;
 			break;
 		}
+		if (task->tk_rebind_retry == 0)
+			break;
+		task->tk_rebind_retry--;
 		rpc_delay(task, 3*HZ);
 		goto retry_timeout;
 	case -ETIMEDOUT:
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 6b43ee7..a27406b 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -792,6 +792,7 @@
 	/* Initialize retry counters */
 	task->tk_garb_retry = 2;
 	task->tk_cred_retry = 2;
+	task->tk_rebind_retry = 2;
 
 	task->tk_priority = task_setup_data->priority - RPC_PRIORITY_LOW;
 	task->tk_owner = current->tgid;
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 8e319a4..8246532 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -469,7 +469,7 @@
 	} else if (ret == -EINPROGRESS) {
 		ret = 0;
 	} else {
-		key = ERR_PTR(ret);
+		goto couldnt_alloc_key;
 	}
 
 	key_put(dest_keyring);
@@ -479,6 +479,7 @@
 construction_failed:
 	key_negate_and_link(key, key_negative_timeout, NULL, NULL);
 	key_put(key);
+couldnt_alloc_key:
 	key_put(dest_keyring);
 	kleave(" = %d", ret);
 	return ERR_PTR(ret);